Merge branch 'akpm' (patches from Andrew Morton)

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 13 Nov 2013 06:45:43 +0000 (15:45 +0900)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 13 Nov 2013 06:45:43 +0000 (15:45 +0900)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Nov 2013 06:45:43 +0000 (15:45 +0900)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Nov 2013 06:45:43 +0000 (15:45 +0900)
diff --git a/Documentation/devices.txt b/Documentation/devices.txt

index 23721d3..80b7241 100644 (file)
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -414,6 +414,7 @@ Your cooperation is appreciated.
                 200 = /dev/net/tun      TAP/TUN network device
                 201 = /dev/button/gulpb Transmeta GULP-B buttons
                 202 = /dev/emd/ctl      Enhanced Metadisk RAID (EMD) control
+               203 = /dev/cuse         Cuse (character device in user-space)
                 204 = /dev/video/em8300         EM8300 DVD decoder control
                 205 = /dev/video/em8300_mv      EM8300 DVD decoder video
                 206 = /dev/video/em8300_ma      EM8300 DVD decoder audio
diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking

index ff7b611..09bbf9a 100644 (file)
--- a/Documentation/filesystems/directory-locking
+++ b/Documentation/filesystems/directory-locking
@@ -2,6 +2,10 @@
  kinds of locks - per-inode (->i_mutex) and per-filesystem
  (->s_vfs_rename_mutex).
  
+       When taking the i_mutex on multiple non-directory objects, we
+always acquire the locks in order by increasing address.  We'll call
+that "inode pointer" order in the following.
+
         For our purposes all operations fall in 5 classes:
  
  1) read access.  Locking rules: caller locks directory we are accessing.
@@ -12,8 +16,9 @@ kinds of locks - per-inode (->i_mutex) and per-filesystem
  locks victim and calls the method.
  
  4) rename() that is _not_ cross-directory.  Locking rules: caller locks
-the parent, finds source and target, if target already exists - locks it
-and then calls the method.
+the parent and finds source and target.  If target already exists, lock
+it.  If source is a non-directory, lock it.  If that means we need to
+lock both, lock them in inode pointer order.
  
  5) link creation.  Locking rules:
         * lock parent
@@ -30,7 +35,9 @@ rules:
                 fail with -ENOTEMPTY
         * if new parent is equal to or is a descendent of source
                 fail with -ELOOP
-       * if target exists - lock it.
+       * If target exists, lock it.  If source is a non-directory, lock
+         it.  In case that means we need to lock both source and target,
+         do so in inode pointer order.
         * call the method.
  
  
@@ -56,9 +63,11 @@ objects - A < B iff A is an ancestor of B.
      renames will be blocked on filesystem lock and we don't start changing
      the order until we had acquired all locks).
  
-(3) any operation holds at most one lock on non-directory object and
-    that lock is acquired after all other locks.  (Proof: see descriptions
-    of operations).
+(3) locks on non-directory objects are acquired only after locks on
+    directory objects, and are acquired in inode pointer order.
+    (Proof: all operations but renames take lock on at most one
+    non-directory object, except renames, which take locks on source and
+    target in inode pointer order in the case they are not directories.)
  
         Now consider the minimal deadlock.  Each process is blocked on
  attempt to acquire some lock and already holds at least one lock.  Let's
@@ -66,9 +75,13 @@ consider the set of contended locks.  First of all, filesystem lock is
  not contended, since any process blocked on it is not holding any locks.
  Thus all processes are blocked on ->i_mutex.
  
-       Non-directory objects are not contended due to (3).  Thus link
-creation can't be a part of deadlock - it can't be blocked on source
-and it means that it doesn't hold any locks.
+       By (3), any process holding a non-directory lock can only be
+waiting on another non-directory lock with a larger address.  Therefore
+the process holding the "largest" such lock can always make progress, and
+non-directory objects are not included in the set of contended locks.
+
+       Thus link creation can't be a part of deadlock - it can't be
+blocked on source and it means that it doesn't hold any locks.
  
         Any contended object is either held by cross-directory rename or
  has a child that is also contended.  Indeed, suppose that it is held by
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt

index 3cd27be..a3fe811 100644 (file)
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -119,6 +119,7 @@ active_logs=%u         Support configuring the number of active logs. In the
                         Default number is 6.
  disable_ext_identify   Disable the extension list configured by mkfs, so f2fs
                         does not aware of cold files such as media files.
+inline_xattr           Enable the inline xattrs feature.
  
  ================================================================================
  DEBUGFS ENTRIES
@@ -164,6 +165,12 @@ Files in /sys/fs/f2fs/<devname>
                                gc_idle = 1 will select the Cost Benefit approach
                                & setting gc_idle = 2 will select the greedy aproach.
  
+ reclaim_segments             This parameter controls the number of prefree
+                              segments to be reclaimed. If the number of prefree
+                             segments is larger than this number, f2fs tries to
+                             conduct checkpoint to reclaim the prefree segments
+                             to free segments. By default, 100 segments, 200MB.
+
  ================================================================================
  USAGE
  ================================================================================
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting

index f089058..fe2b7ae 100644 (file)
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -455,3 +455,11 @@ in your dentry operations instead.
         vfs_follow_link has been removed.  Filesystems must use nd_set_link
         from ->follow_link for normal symlinks, or nd_jump_link for magic
         /proc/<pid> style links.
+--
+[mandatory]
+       iget5_locked()/ilookup5()/ilookup5_nowait() test() callback used to be
+       called with both ->i_lock and inode_hash_lock held; the former is *not*
+       taken anymore, so verify that your callbacks do not rely on it (none
+       of the in-tree instances did).  inode_hash_lock is still held,
+       of course, so they are still serialized wrt removal from inode hash,
+       as well as wrt set() callback of iget5_locked().
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c

index e51bbe7..b3fc9f5 100644 (file)
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -122,7 +122,7 @@ static inline int get_sigset_t(sigset_t *set,
         return 0;
  }
  
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
  {
         int err;
  
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c

index bac1639..04bc8fd 100644 (file)
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -11,8 +11,7 @@ Elf64_Half elf_core_extra_phdrs(void)
         return GATE_EHDR->e_phnum;
  }
  
-int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-                              unsigned long limit)
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
  {
         const struct elf_phdr *const gate_phdrs =
                 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
@@ -35,15 +34,13 @@ int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
                         phdr.p_offset += ofs;
                 }
                 phdr.p_paddr = 0; /* match other core phdrs */
-               *size += sizeof(phdr);
-               if (*size > limit || !dump_write(file, &phdr, sizeof(phdr)))
+               if (!dump_emit(cprm, &phdr, sizeof(phdr)))
                         return 0;
         }
         return 1;
  }
  
-int elf_core_write_extra_data(struct file *file, size_t *size,
-                             unsigned long limit)
+int elf_core_write_extra_data(struct coredump_params *cprm)
  {
         const struct elf_phdr *const gate_phdrs =
                 (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
@@ -54,8 +51,7 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
                         void *addr = (void *)gate_phdrs[i].p_vaddr;
                         size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
  
-                       *size += memsz;
-                       if (*size > limit || !dump_write(file, addr, memsz))
+                       if (!dump_emit(cprm, addr, memsz))
                                 return 0;
                         break;
                 }
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c

index 3637e03..33cab9a 100644 (file)
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -105,7 +105,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
  }
  
  int
-copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
+copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from)
  {
         if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
                 return -EFAULT;
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c

index 57de8b7..1905a41 100644 (file)
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -314,7 +314,7 @@ SYSCALL_DEFINE3(32_sigaction, long, sig, const struct compat_sigaction __user *,
         return ret;
  }
  
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
  {
         int err;
  
diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c

index 6c6a271..984abbe 100644 (file)
--- a/arch/parisc/kernel/signal32.c
+++ b/arch/parisc/kernel/signal32.c
@@ -319,7 +319,7 @@ copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from)
  }
  
  int
-copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from)
+copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from)
  {
         compat_uptr_t addr;
         compat_int_t val;
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h

index 72ab41a..af51d4c 100644 (file)
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -34,7 +34,7 @@ struct compat_ucontext {
  
  /* ELF32 signal handling */
  
-int copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from);
+int copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from);
  int copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from);
  
  /* In a deft move of uber-hackery, we decide to carry the top half of all
diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h

index 93f280e..37b7ca3 100644 (file)
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -235,6 +235,7 @@ extern long spu_sys_callback(struct spu_syscall_block *s);
  
  /* syscalls implemented in spufs */
  struct file;
+struct coredump_params;
  struct spufs_calls {
         long (*create_thread)(const char __user *name,
                                         unsigned int flags, umode_t mode,
@@ -242,7 +243,7 @@ struct spufs_calls {
         long (*spu_run)(struct file *filp, __u32 __user *unpc,
                                                 __u32 __user *ustatus);
         int (*coredump_extra_notes_size)(void);
-       int (*coredump_extra_notes_write)(struct file *file, loff_t *foffset);
+       int (*coredump_extra_notes_write)(struct coredump_params *cprm);
         void (*notify_spus_active)(void);
         struct module *owner;
  };
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c

index 1a410aa..749778e 100644 (file)
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -893,7 +893,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
  #endif
  
  #ifdef CONFIG_PPC64
-int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s)
+int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
  {
         int err;
  
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c

index db4e638..3844f13 100644 (file)
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -25,6 +25,7 @@
  #include <linux/module.h>
  #include <linux/syscalls.h>
  #include <linux/rcupdate.h>
+#include <linux/binfmts.h>
  
  #include <asm/spu.h>
  
@@ -126,7 +127,7 @@ int elf_coredump_extra_notes_size(void)
         return ret;
  }
  
-int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
  {
         struct spufs_calls *calls;
         int ret;
@@ -135,7 +136,7 @@ int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
         if (!calls)
                 return 0;
  
-       ret = calls->coredump_extra_notes_write(file, foffset);
+       ret = calls->coredump_extra_notes_write(cprm);
  
         spufs_calls_put(calls);
  
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c

index c9500ea..be6212d 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -27,6 +27,8 @@
  #include <linux/gfp.h>
  #include <linux/list.h>
  #include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
  
  #include <asm/uaccess.h>
  
@@ -48,44 +50,6 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
         return ++ret; /* count trailing NULL */
  }
  
-/*
- * These are the only things you should do on a core-file: use only these
- * functions to write out all the necessary info.
- */
-static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
-{
-       unsigned long limit = rlimit(RLIMIT_CORE);
-       ssize_t written;
-
-       if (*foffset + nr > limit)
-               return -EIO;
-
-       written = file->f_op->write(file, addr, nr, &file->f_pos);
-       *foffset += written;
-
-       if (written != nr)
-               return -EIO;
-
-       return 0;
-}
-
-static int spufs_dump_align(struct file *file, char *buf, loff_t new_off,
-                           loff_t *foffset)
-{
-       int rc, size;
-
-       size = min((loff_t)PAGE_SIZE, new_off - *foffset);
-       memset(buf, 0, size);
-
-       rc = 0;
-       while (rc == 0 && new_off > *foffset) {
-               size = min((loff_t)PAGE_SIZE, new_off - *foffset);
-               rc = spufs_dump_write(file, buf, size, foffset);
-       }
-
-       return rc;
-}
-
  static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
  {
         int i, sz, total = 0;
@@ -165,10 +129,10 @@ int spufs_coredump_extra_notes_size(void)
  }
  
  static int spufs_arch_write_note(struct spu_context *ctx, int i,
-                                 struct file *file, int dfd, loff_t *foffset)
+                                 struct coredump_params *cprm, int dfd)
  {
         loff_t pos = 0;
-       int sz, rc, nread, total = 0;
+       int sz, rc, total = 0;
         const int bufsz = PAGE_SIZE;
         char *name;
         char fullname[80], *buf;
@@ -186,42 +150,39 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
         en.n_descsz = sz;
         en.n_type = NT_SPU;
  
-       rc = spufs_dump_write(file, &en, sizeof(en), foffset);
-       if (rc)
-               goto out;
+       if (!dump_emit(cprm, &en, sizeof(en)))
+               goto Eio;
  
-       rc = spufs_dump_write(file, fullname, en.n_namesz, foffset);
-       if (rc)
-               goto out;
+       if (!dump_emit(cprm, fullname, en.n_namesz))
+               goto Eio;
  
-       rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset);
-       if (rc)
-               goto out;
+       if (!dump_align(cprm, 4))
+               goto Eio;
  
         do {
-               nread = do_coredump_read(i, ctx, buf, bufsz, &pos);
-               if (nread > 0) {
-                       rc = spufs_dump_write(file, buf, nread, foffset);
-                       if (rc)
-                               goto out;
-                       total += nread;
+               rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
+               if (rc > 0) {
+                       if (!dump_emit(cprm, buf, rc))
+                               goto Eio;
+                       total += rc;
                 }
-       } while (nread == bufsz && total < sz);
+       } while (rc == bufsz && total < sz);
  
-       if (nread < 0) {
-               rc = nread;
+       if (rc < 0)
                 goto out;
-       }
-
-       rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4),
-                             foffset);
  
+       if (!dump_skip(cprm,
+                      roundup(cprm->written - total + sz, 4) - cprm->written))
+               goto Eio;
  out:
         free_page((unsigned long)buf);
         return rc;
+Eio:
+       free_page((unsigned long)buf);
+       return -EIO;
  }
  
-int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
  {
         struct spu_context *ctx;
         int fd, j, rc;
@@ -233,7 +194,7 @@ int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
                         return rc;
  
                 for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
-                       rc = spufs_arch_write_note(ctx, j, file, fd, foffset);
+                       rc = spufs_arch_write_note(ctx, j, cprm, fd);
                         if (rc) {
                                 spu_release_saved(ctx);
                                 return rc;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h

index 67852ad..0ba3c95 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -247,12 +247,13 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[];
  
  /* system call implementation */
  extern struct spufs_calls spufs_calls;
+struct coredump_params;
  long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
  long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
                         umode_t mode, struct file *filp);
  /* ELF coredump callbacks for writing SPU ELF notes */
  extern int spufs_coredump_extra_notes_size(void);
-extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
  
  extern const struct file_operations spufs_context_fops;
  
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c

index 5a3ab5c..6e24429 100644 (file)
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -49,7 +49,7 @@ typedef struct
         __u32 gprs_high[NUM_GPRS];
  } rt_sigframe32;
  
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
  {
         int err;
  
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c

index b524f91..ee789d2 100644 (file)
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -68,7 +68,7 @@ struct rt_signal_frame32 {
         /* __siginfo_rwin_t * */u32 rwin_save;
  } __attribute__((aligned(8)));
  
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
  {
         int err;
  
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c

index 85e00b2..19c04b5 100644 (file)
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -49,7 +49,7 @@ struct compat_rt_sigframe {
         struct compat_ucontext uc;
  };
  
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from)
  {
         int err;
  
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c

index bae3aba..d21ff89 100644 (file)
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -25,6 +25,7 @@
  #include <linux/personality.h>
  #include <linux/init.h>
  #include <linux/jiffies.h>
+#include <linux/perf_event.h>
  
  #include <asm/uaccess.h>
  #include <asm/pgalloc.h>
@@ -33,14 +34,18 @@
  #include <asm/ia32.h>
  
  #undef WARN_OLD
-#undef CORE_DUMP /* definitely broken */
  
  static int load_aout_binary(struct linux_binprm *);
  static int load_aout_library(struct file *);
  
-#ifdef CORE_DUMP
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
-                         unsigned long limit);
+#ifdef CONFIG_COREDUMP
+static int aout_core_dump(struct coredump_params *);
+
+static unsigned long get_dr(int n)
+{
+       struct perf_event *bp = current->thread.ptrace_bps[n];
+       return bp ? bp->hw.info.address : 0;
+}
  
  /*
   * fill in the user structure for a core dump..
@@ -48,6 +53,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
  static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
  {
         u32 fs, gs;
+       memset(dump, 0, sizeof(*dump));
  
  /* changed the size calculations - should hopefully work better. lbt */
         dump->magic = CMAGIC;
@@ -57,15 +63,12 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
         dump->u_dsize = ((unsigned long)
                          (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
         dump->u_dsize -= dump->u_tsize;
-       dump->u_ssize = 0;
-       dump->u_debugreg[0] = current->thread.debugreg0;
-       dump->u_debugreg[1] = current->thread.debugreg1;
-       dump->u_debugreg[2] = current->thread.debugreg2;
-       dump->u_debugreg[3] = current->thread.debugreg3;
-       dump->u_debugreg[4] = 0;
-       dump->u_debugreg[5] = 0;
+       dump->u_debugreg[0] = get_dr(0);
+       dump->u_debugreg[1] = get_dr(1);
+       dump->u_debugreg[2] = get_dr(2);
+       dump->u_debugreg[3] = get_dr(3);
         dump->u_debugreg[6] = current->thread.debugreg6;
-       dump->u_debugreg[7] = current->thread.debugreg7;
+       dump->u_debugreg[7] = current->thread.ptrace_dr7;
  
         if (dump->start_stack < 0xc0000000) {
                 unsigned long tmp;
@@ -74,24 +77,24 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
                 dump->u_ssize = tmp >> PAGE_SHIFT;
         }
  
-       dump->regs.bx = regs->bx;
-       dump->regs.cx = regs->cx;
-       dump->regs.dx = regs->dx;
-       dump->regs.si = regs->si;
-       dump->regs.di = regs->di;
-       dump->regs.bp = regs->bp;
-       dump->regs.ax = regs->ax;
+       dump->regs.ebx = regs->bx;
+       dump->regs.ecx = regs->cx;
+       dump->regs.edx = regs->dx;
+       dump->regs.esi = regs->si;
+       dump->regs.edi = regs->di;
+       dump->regs.ebp = regs->bp;
+       dump->regs.eax = regs->ax;
         dump->regs.ds = current->thread.ds;
         dump->regs.es = current->thread.es;
         savesegment(fs, fs);
         dump->regs.fs = fs;
         savesegment(gs, gs);
         dump->regs.gs = gs;
-       dump->regs.orig_ax = regs->orig_ax;
-       dump->regs.ip = regs->ip;
+       dump->regs.orig_eax = regs->orig_ax;
+       dump->regs.eip = regs->ip;
         dump->regs.cs = regs->cs;
-       dump->regs.flags = regs->flags;
-       dump->regs.sp = regs->sp;
+       dump->regs.eflags = regs->flags;
+       dump->regs.esp = regs->sp;
         dump->regs.ss = regs->ss;
  
  #if 1 /* FIXME */
@@ -107,7 +110,7 @@ static struct linux_binfmt aout_format = {
         .module         = THIS_MODULE,
         .load_binary    = load_aout_binary,
         .load_shlib     = load_aout_library,
-#ifdef CORE_DUMP
+#ifdef CONFIG_COREDUMP
         .core_dump      = aout_core_dump,
  #endif
         .min_coredump   = PAGE_SIZE
@@ -122,7 +125,7 @@ static void set_brk(unsigned long start, unsigned long end)
         vm_brk(start, end - start);
  }
  
-#ifdef CORE_DUMP
+#ifdef CONFIG_COREDUMP
  /*
   * These are the only things you should do on a core-file: use only these
   * macros to write out all the necessary info.
@@ -130,15 +133,7 @@ static void set_brk(unsigned long start, unsigned long end)
  
  #include <linux/coredump.h>
  
-#define DUMP_WRITE(addr, nr)                        \
-       if (!dump_write(file, (void *)(addr), (nr))) \
-               goto end_coredump;
-
-#define DUMP_SEEK(offset)              \
-       if (!dump_seek(file, offset))   \
-               goto end_coredump;
-
-#define START_DATA()   (u.u_tsize << PAGE_SHIFT)
+#define START_DATA(u)  (u.u_tsize << PAGE_SHIFT)
  #define START_STACK(u) (u.start_stack)
  
  /*
@@ -151,8 +146,7 @@ static void set_brk(unsigned long start, unsigned long end)
   * dumping of the process results in another error..
   */
  
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
-                         unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
  {
         mm_segment_t fs;
         int has_dumped = 0;
@@ -164,19 +158,19 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
         has_dumped = 1;
         strncpy(dump.u_comm, current->comm, sizeof(current->comm));
         dump.u_ar0 = offsetof(struct user32, regs);
-       dump.signal = signr;
-       dump_thread32(regs, &dump);
+       dump.signal = cprm->siginfo->si_signo;
+       dump_thread32(cprm->regs, &dump);
  
         /*
          * If the size of the dump file exceeds the rlimit, then see
          * what would happen if we wrote the stack, but not the data
          * area.
          */
-       if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit)
+       if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
                 dump.u_dsize = 0;
  
         /* Make sure we have enough room to write the stack and data areas. */
-       if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+       if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
                 dump.u_ssize = 0;
  
         /* make sure we actually have a data and stack area to dump */
@@ -190,22 +184,26 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
  
         set_fs(KERNEL_DS);
         /* struct user */
-       DUMP_WRITE(&dump, sizeof(dump));
+       if (!dump_emit(cprm, &dump, sizeof(dump)))
+               goto end_coredump;
         /* Now dump all of the user data.  Include malloced stuff as well */
-       DUMP_SEEK(PAGE_SIZE - sizeof(dump));
+       if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
+               goto end_coredump;
         /* now we start writing out the user space info */
         set_fs(USER_DS);
         /* Dump the data area */
         if (dump.u_dsize != 0) {
                 dump_start = START_DATA(dump);
                 dump_size = dump.u_dsize << PAGE_SHIFT;
-               DUMP_WRITE(dump_start, dump_size);
+               if (!dump_emit(cprm, (void *)dump_start, dump_size))
+                       goto end_coredump;
         }
         /* Now prepare to dump the stack area */
         if (dump.u_ssize != 0) {
                 dump_start = START_STACK(dump);
                 dump_size = dump.u_ssize << PAGE_SHIFT;
-               DUMP_WRITE(dump_start, dump_size);
+               if (!dump_emit(cprm, (void *)dump_start, dump_size))
+                       goto end_coredump;
         }
  end_coredump:
         set_fs(fs);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c

index 665a730..2206757 100644 (file)
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -34,7 +34,7 @@
  #include <asm/sys_ia32.h>
  #include <asm/smap.h>
  
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
  {
         int err = 0;
         bool ia32 = test_thread_flag(TIF_IA32);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h

index b3e18f8..94220d1 100644 (file)
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -378,9 +378,6 @@ do {                                                                        \
  #define __this_cpu_or_1(pcp, val)      percpu_to_op("or", (pcp), val)
  #define __this_cpu_or_2(pcp, val)      percpu_to_op("or", (pcp), val)
  #define __this_cpu_or_4(pcp, val)      percpu_to_op("or", (pcp), val)
-#define __this_cpu_xor_1(pcp, val)     percpu_to_op("xor", (pcp), val)
-#define __this_cpu_xor_2(pcp, val)     percpu_to_op("xor", (pcp), val)
-#define __this_cpu_xor_4(pcp, val)     percpu_to_op("xor", (pcp), val)
  #define __this_cpu_xchg_1(pcp, val)    percpu_xchg_op(pcp, val)
  #define __this_cpu_xchg_2(pcp, val)    percpu_xchg_op(pcp, val)
  #define __this_cpu_xchg_4(pcp, val)    percpu_xchg_op(pcp, val)
@@ -400,9 +397,6 @@ do {                                                                        \
  #define this_cpu_or_1(pcp, val)                percpu_to_op("or", (pcp), val)
  #define this_cpu_or_2(pcp, val)                percpu_to_op("or", (pcp), val)
  #define this_cpu_or_4(pcp, val)                percpu_to_op("or", (pcp), val)
-#define this_cpu_xor_1(pcp, val)       percpu_to_op("xor", (pcp), val)
-#define this_cpu_xor_2(pcp, val)       percpu_to_op("xor", (pcp), val)
-#define this_cpu_xor_4(pcp, val)       percpu_to_op("xor", (pcp), val)
  #define this_cpu_xchg_1(pcp, nval)     percpu_xchg_op(pcp, nval)
  #define this_cpu_xchg_2(pcp, nval)     percpu_xchg_op(pcp, nval)
  #define this_cpu_xchg_4(pcp, nval)     percpu_xchg_op(pcp, nval)
@@ -447,7 +441,6 @@ do {                                                                        \
  #define __this_cpu_add_8(pcp, val)     percpu_add_op((pcp), val)
  #define __this_cpu_and_8(pcp, val)     percpu_to_op("and", (pcp), val)
  #define __this_cpu_or_8(pcp, val)      percpu_to_op("or", (pcp), val)
-#define __this_cpu_xor_8(pcp, val)     percpu_to_op("xor", (pcp), val)
  #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
  #define __this_cpu_xchg_8(pcp, nval)   percpu_xchg_op(pcp, nval)
  #define __this_cpu_cmpxchg_8(pcp, oval, nval)  percpu_cmpxchg_op(pcp, oval, nval)
@@ -457,7 +450,6 @@ do {                                                                        \
  #define this_cpu_add_8(pcp, val)       percpu_add_op((pcp), val)
  #define this_cpu_and_8(pcp, val)       percpu_to_op("and", (pcp), val)
  #define this_cpu_or_8(pcp, val)                percpu_to_op("or", (pcp), val)
-#define this_cpu_xor_8(pcp, val)       percpu_to_op("xor", (pcp), val)
  #define this_cpu_add_return_8(pcp, val)        percpu_add_return_op(pcp, val)
  #define this_cpu_xchg_8(pcp, nval)     percpu_xchg_op(pcp, nval)
  #define this_cpu_cmpxchg_8(pcp, oval, nval)    percpu_cmpxchg_op(pcp, oval, nval)
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c

index 6bb49b6..7bb89a2 100644 (file)
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -11,8 +11,7 @@ Elf32_Half elf_core_extra_phdrs(void)
         return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
  }
  
-int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-                              unsigned long limit)
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
  {
         if ( vsyscall_ehdr ) {
                 const struct elfhdr *const ehdrp =
@@ -32,17 +31,14 @@ int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
                                 phdr.p_offset += ofs;
                         }
                         phdr.p_paddr = 0; /* match other core phdrs */
-                       *size += sizeof(phdr);
-                       if (*size > limit
-                           || !dump_write(file, &phdr, sizeof(phdr)))
+                       if (!dump_emit(cprm, &phdr, sizeof(phdr)))
                                 return 0;
                 }
         }
         return 1;
  }
  
-int elf_core_write_extra_data(struct file *file, size_t *size,
-                             unsigned long limit)
+int elf_core_write_extra_data(struct coredump_params *cprm)
  {
         if ( vsyscall_ehdr ) {
                 const struct elfhdr *const ehdrp =
@@ -55,10 +51,7 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
                         if (phdrp[i].p_type == PT_LOAD) {
                                 void *addr = (void *) phdrp[i].p_vaddr;
                                 size_t filesz = phdrp[i].p_filesz;
-
-                               *size += filesz;
-                               if (*size > limit
-                                   || !dump_write(file, addr, filesz))
+                               if (!dump_emit(cprm, addr, filesz))
                                         return 0;
                         }
                 }
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c

index 8e28f92..e2903d0 100644 (file)
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -292,6 +292,10 @@ static const struct pci_device_id ahci_pci_tbl[] = {
         { PCI_VDEVICE(INTEL, 0x8d66), board_ahci }, /* Wellsburg RAID */
         { PCI_VDEVICE(INTEL, 0x8d6e), board_ahci }, /* Wellsburg RAID */
         { PCI_VDEVICE(INTEL, 0x23a3), board_ahci }, /* Coleto Creek AHCI */
+       { PCI_VDEVICE(INTEL, 0x9c83), board_ahci }, /* Wildcat Point-LP AHCI */
+       { PCI_VDEVICE(INTEL, 0x9c85), board_ahci }, /* Wildcat Point-LP RAID */
+       { PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */
+       { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */
  
         /* JMicron 360/1/3/5/6, match class to avoid IDE function */
         { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h

index 1145637..2289efd 100644 (file)
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -339,6 +339,7 @@ extern struct device_attribute *ahci_sdev_attrs[];
         .sdev_attrs             = ahci_sdev_attrs
  
  extern struct ata_port_operations ahci_ops;
+extern struct ata_port_operations ahci_platform_ops;
  extern struct ata_port_operations ahci_pmp_retry_srst_ops;
  
  unsigned int ahci_dev_classify(struct ata_port *ap);
@@ -368,6 +369,7 @@ irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance);
  irqreturn_t ahci_thread_fn(int irq, void *dev_instance);
  void ahci_print_info(struct ata_host *host, const char *scc_s);
  int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis);
+void ahci_error_handler(struct ata_port *ap);
  
  static inline void __iomem *__ahci_port_base(struct ata_host *host,
                                              unsigned int port_no)
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c

index 58debb0..ae2d73f 100644 (file)
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -1,6 +1,6 @@
  /*
+ * copyright (c) 2013 Freescale Semiconductor, Inc.
   * Freescale IMX AHCI SATA platform driver
- * Copyright 2013 Freescale Semiconductor, Inc.
   *
   * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov
   *
@@ -25,10 +25,13 @@
  #include <linux/of_device.h>
  #include <linux/mfd/syscon.h>
  #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+#include <linux/libata.h>
  #include "ahci.h"
  
  enum {
-       HOST_TIMER1MS = 0xe0, /* Timer 1-ms */
+       PORT_PHY_CTL = 0x178,                   /* Port0 PHY Control */
+       PORT_PHY_CTL_PDDQ_LOC = 0x100000,       /* PORT_PHY_CTL bits */
+       HOST_TIMER1MS = 0xe0,                   /* Timer 1-ms */
  };
  
  struct imx_ahci_priv {
@@ -36,6 +39,56 @@ struct imx_ahci_priv {
         struct clk *sata_ref_clk;
         struct clk *ahb_clk;
         struct regmap *gpr;
+       bool no_device;
+       bool first_time;
+};
+
+static int ahci_imx_hotplug;
+module_param_named(hotplug, ahci_imx_hotplug, int, 0644);
+MODULE_PARM_DESC(hotplug, "AHCI IMX hot-plug support (0=Don't support, 1=support)");
+
+static void ahci_imx_error_handler(struct ata_port *ap)
+{
+       u32 reg_val;
+       struct ata_device *dev;
+       struct ata_host *host = dev_get_drvdata(ap->dev);
+       struct ahci_host_priv *hpriv = host->private_data;
+       void __iomem *mmio = hpriv->mmio;
+       struct imx_ahci_priv *imxpriv = dev_get_drvdata(ap->dev->parent);
+
+       ahci_error_handler(ap);
+
+       if (!(imxpriv->first_time) || ahci_imx_hotplug)
+               return;
+
+       imxpriv->first_time = false;
+
+       ata_for_each_dev(dev, &ap->link, ENABLED)
+               return;
+       /*
+        * Disable link to save power.  An imx ahci port can't be recovered
+        * without full reset once the pddq mode is enabled making it
+        * impossible to use as part of libata LPM.
+        */
+       reg_val = readl(mmio + PORT_PHY_CTL);
+       writel(reg_val | PORT_PHY_CTL_PDDQ_LOC, mmio + PORT_PHY_CTL);
+       regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13,
+                       IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+                       !IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+       clk_disable_unprepare(imxpriv->sata_ref_clk);
+       imxpriv->no_device = true;
+}
+
+static struct ata_port_operations ahci_imx_ops = {
+       .inherits       = &ahci_platform_ops,
+       .error_handler  = ahci_imx_error_handler,
+};
+
+static const struct ata_port_info ahci_imx_port_info = {
+       .flags          = AHCI_FLAG_COMMON,
+       .pio_mask       = ATA_PIO4,
+       .udma_mask      = ATA_UDMA6,
+       .port_ops       = &ahci_imx_ops,
  };
  
  static int imx6q_sata_init(struct device *dev, void __iomem *mmio)
@@ -117,9 +170,51 @@ static void imx6q_sata_exit(struct device *dev)
         clk_disable_unprepare(imxpriv->sata_ref_clk);
  }
  
+static int imx_ahci_suspend(struct device *dev)
+{
+       struct imx_ahci_priv *imxpriv =  dev_get_drvdata(dev->parent);
+
+       /*
+        * If no_device is set, The CLKs had been gated off in the
+        * initialization so don't do it again here.
+        */
+       if (!imxpriv->no_device) {
+               regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13,
+                               IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+                               !IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+               clk_disable_unprepare(imxpriv->sata_ref_clk);
+       }
+
+       return 0;
+}
+
+static int imx_ahci_resume(struct device *dev)
+{
+       struct imx_ahci_priv *imxpriv =  dev_get_drvdata(dev->parent);
+       int ret;
+
+       if (!imxpriv->no_device) {
+               ret = clk_prepare_enable(imxpriv->sata_ref_clk);
+               if (ret < 0) {
+                       dev_err(dev, "pre-enable sata_ref clock err:%d\n", ret);
+                       return ret;
+               }
+
+               regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13,
+                               IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+                               IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+               usleep_range(1000, 2000);
+       }
+
+       return 0;
+}
+
  static struct ahci_platform_data imx6q_sata_pdata = {
         .init = imx6q_sata_init,
         .exit = imx6q_sata_exit,
+       .ata_port_info = &ahci_imx_port_info,
+       .suspend = imx_ahci_suspend,
+       .resume = imx_ahci_resume,
  };
  
  static const struct of_device_id imx_ahci_of_match[] = {
@@ -152,6 +247,8 @@ static int imx_ahci_probe(struct platform_device *pdev)
         ahci_dev = &ahci_pdev->dev;
         ahci_dev->parent = dev;
  
+       imxpriv->no_device = false;
+       imxpriv->first_time = true;
         imxpriv->ahb_clk = devm_clk_get(dev, "ahb");
         if (IS_ERR(imxpriv->ahb_clk)) {
                 dev_err(dev, "can't get ahb clock.\n");
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c

index 7d3b853..f955431 100644 (file)
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -49,10 +49,11 @@ static struct platform_device_id ahci_devtype[] = {
  };
  MODULE_DEVICE_TABLE(platform, ahci_devtype);
  
-static struct ata_port_operations ahci_platform_ops = {
+struct ata_port_operations ahci_platform_ops = {
         .inherits       = &ahci_ops,
         .host_stop      = ahci_host_stop,
  };
+EXPORT_SYMBOL_GPL(ahci_platform_ops);
  
  static struct ata_port_operations ahci_platform_retry_srst_ops = {
         .inherits       = &ahci_pmp_retry_srst_ops,
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c

index 513ad7e..6334c8d 100644 (file)
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -100,7 +100,7 @@
  
  enum {
         PIIX_IOCFG              = 0x54, /* IDE I/O configuration register */
-       ICH5_PMR                = 0x90, /* port mapping register */
+       ICH5_PMR                = 0x90, /* address map register */
         ICH5_PCS                = 0x92, /* port control and status */
         PIIX_SIDPR_BAR          = 5,
         PIIX_SIDPR_LEN          = 16,
@@ -233,7 +233,7 @@ static const struct pci_device_id piix_pci_tbl[] = {
           PCI_CLASS_STORAGE_IDE << 8, 0xffff00, ich6m_sata },
         /* 82801GB/GR/GH (ICH7, identical to ICH6) */
         { 0x8086, 0x27c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata },
-       /* 2801GBM/GHM (ICH7M, identical to ICH6M) */
+       /* 82801GBM/GHM (ICH7M, identical to ICH6M)  */
         { 0x8086, 0x27c4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6m_sata },
         /* Enterprise Southbridge 2 (631xESB/632xESB) */
         { 0x8086, 0x2680, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich6_sata },
@@ -517,7 +517,7 @@ static int ich_pata_cable_detect(struct ata_port *ap)
         const struct ich_laptop *lap = &ich_laptop[0];
         u8 mask;
  
-       /* Check for specials - Acer Aspire 5602WLMi */
+       /* Check for specials */
         while (lap->device) {
                 if (lap->device == pdev->device &&
                     lap->subvendor == pdev->subsystem_vendor &&
@@ -1366,38 +1366,39 @@ static const int *piix_init_sata_map(struct pci_dev *pdev,
         const int *map;
         int i, invalid_map = 0;
         u8 map_value;
+       char buf[32];
+       char *p = buf, *end = buf + sizeof(buf);
  
         pci_read_config_byte(pdev, ICH5_PMR, &map_value);
  
         map = map_db->map[map_value & map_db->mask];
  
-       dev_info(&pdev->dev, "MAP [");
         for (i = 0; i < 4; i++) {
                 switch (map[i]) {
                 case RV:
                         invalid_map = 1;
-                       pr_cont(" XX");
+                       p += scnprintf(p, end - p, " XX");
                         break;
  
                 case NA:
-                       pr_cont(" --");
+                       p += scnprintf(p, end - p, " --");
                         break;
  
                 case IDE:
                         WARN_ON((i & 1) || map[i + 1] != IDE);
                         pinfo[i / 2] = piix_port_info[ich_pata_100];
                         i++;
-                       pr_cont(" IDE IDE");
+                       p += scnprintf(p, end - p, " IDE IDE");
                         break;
  
                 default:
-                       pr_cont(" P%d", map[i]);
+                       p += scnprintf(p, end - p, " P%d", map[i]);
                         if (i & 1)
                                 pinfo[i / 2].flags |= ATA_FLAG_SLAVE_POSS;
                         break;
                 }
         }
-       pr_cont(" ]\n");
+       dev_info(&pdev->dev, "MAP [%s ]\n", buf);
  
         if (invalid_map)
                 dev_err(&pdev->dev, "invalid MAP value %u\n", map_value);
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c

index aaac4fb..c482f8c 100644 (file)
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -89,7 +89,6 @@ static int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class,
  static int ahci_hardreset(struct ata_link *link, unsigned int *class,
                           unsigned long deadline);
  static void ahci_postreset(struct ata_link *link, unsigned int *class);
-static void ahci_error_handler(struct ata_port *ap);
  static void ahci_post_internal_cmd(struct ata_queued_cmd *qc);
  static void ahci_dev_config(struct ata_device *dev);
  #ifdef CONFIG_PM
@@ -189,14 +188,15 @@ struct ata_port_operations ahci_pmp_retry_srst_ops = {
  };
  EXPORT_SYMBOL_GPL(ahci_pmp_retry_srst_ops);
  
-int ahci_em_messages = 1;
+static bool ahci_em_messages __read_mostly = true;
  EXPORT_SYMBOL_GPL(ahci_em_messages);
-module_param(ahci_em_messages, int, 0444);
+module_param(ahci_em_messages, bool, 0444);
  /* add other LED protocol types when they become supported */
  MODULE_PARM_DESC(ahci_em_messages,
         "AHCI Enclosure Management Message control (0 = off, 1 = on)");
  
-int devslp_idle_timeout = 1000;        /* device sleep idle timeout in ms */
+/* device sleep idle timeout in ms */
+static int devslp_idle_timeout __read_mostly = 1000;
  module_param(devslp_idle_timeout, int, 0644);
  MODULE_PARM_DESC(devslp_idle_timeout, "device sleep idle timeout");
  
@@ -1275,9 +1275,11 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
  {
         struct ata_port *ap = link->ap;
         struct ahci_host_priv *hpriv = ap->host->private_data;
+       struct ahci_port_priv *pp = ap->private_data;
         const char *reason = NULL;
         unsigned long now, msecs;
         struct ata_taskfile tf;
+       bool fbs_disabled = false;
         int rc;
  
         DPRINTK("ENTER\n");
@@ -1287,6 +1289,16 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
         if (rc && rc != -EOPNOTSUPP)
                 ata_link_warn(link, "failed to reset engine (errno=%d)\n", rc);
  
+       /*
+        * According to AHCI-1.2 9.3.9: if FBS is enable, software shall
+        * clear PxFBS.EN to '0' prior to issuing software reset to devices
+        * that is attached to port multiplier.
+        */
+       if (!ata_is_host_link(link) && pp->fbs_enabled) {
+               ahci_disable_fbs(ap);
+               fbs_disabled = true;
+       }
+
         ata_tf_init(link->device, &tf);
  
         /* issue the first D2H Register FIS */
@@ -1327,6 +1339,10 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
         } else
                 *class = ahci_dev_classify(ap);
  
+       /* re-enable FBS if disabled before */
+       if (fbs_disabled)
+               ahci_enable_fbs(ap);
+
         DPRINTK("EXIT, class=%u\n", *class);
         return 0;
  
@@ -1989,7 +2005,7 @@ static void ahci_thaw(struct ata_port *ap)
         writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
  }
  
-static void ahci_error_handler(struct ata_port *ap)
+void ahci_error_handler(struct ata_port *ap)
  {
         if (!(ap->pflags & ATA_PFLAG_FROZEN)) {
                 /* restart engine */
@@ -2002,6 +2018,7 @@ static void ahci_error_handler(struct ata_port *ap)
         if (!ata_dev_enabled(ap->link.device))
                 ahci_stop_engine(ap);
  }
+EXPORT_SYMBOL_GPL(ahci_error_handler);
  
  static void ahci_post_internal_cmd(struct ata_queued_cmd *qc)
  {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c

index 83b1a9f..81a94a3 100644 (file)
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4126,6 +4126,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
         { "TORiSAN DVD-ROM DRD-N216", NULL,     ATA_HORKAGE_MAX_SEC_128 },
         { "QUANTUM DAT    DAT72-000", NULL,     ATA_HORKAGE_ATAPI_MOD16_DMA },
         { "Slimtype DVD A  DS8A8SH", NULL,      ATA_HORKAGE_MAX_SEC_LBA48 },
+       { "Slimtype DVD A  DS8A9SH", NULL,      ATA_HORKAGE_MAX_SEC_LBA48 },
  
         /* Devices we expect to fail diagnostics */
  
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c

index 370462f..77bbc82 100644 (file)
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2293,6 +2293,7 @@ const char *ata_get_cmd_descript(u8 command)
                 { ATA_CMD_IDLE,                 "IDLE" },
                 { ATA_CMD_EDD,                  "EXECUTE DEVICE DIAGNOSTIC" },
                 { ATA_CMD_DOWNLOAD_MICRO,       "DOWNLOAD MICROCODE" },
+               { ATA_CMD_DOWNLOAD_MICRO_DMA,   "DOWNLOAD MICROCODE DMA" },
                 { ATA_CMD_NOP,                  "NOP" },
                 { ATA_CMD_FLUSH,                "FLUSH CACHE" },
                 { ATA_CMD_FLUSH_EXT,            "FLUSH CACHE EXT" },
@@ -2313,6 +2314,8 @@ const char *ata_get_cmd_descript(u8 command)
                 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
                 { ATA_CMD_FPDMA_READ,           "READ FPDMA QUEUED" },
                 { ATA_CMD_FPDMA_WRITE,          "WRITE FPDMA QUEUED" },
+               { ATA_CMD_FPDMA_SEND,           "SEND FPDMA QUEUED" },
+               { ATA_CMD_FPDMA_RECV,           "RECEIVE FPDMA QUEUED" },
                 { ATA_CMD_PIO_READ,             "READ SECTOR(S)" },
                 { ATA_CMD_PIO_READ_EXT,         "READ SECTOR(S) EXT" },
                 { ATA_CMD_PIO_WRITE,            "WRITE SECTOR(S)" },
@@ -2339,12 +2342,15 @@ const char *ata_get_cmd_descript(u8 command)
                 { ATA_CMD_WRITE_LOG_EXT,        "WRITE LOG EXT" },
                 { ATA_CMD_READ_LOG_DMA_EXT,     "READ LOG DMA EXT" },
                 { ATA_CMD_WRITE_LOG_DMA_EXT,    "WRITE LOG DMA EXT" },
+               { ATA_CMD_TRUSTED_NONDATA,      "TRUSTED NON-DATA" },
                 { ATA_CMD_TRUSTED_RCV,          "TRUSTED RECEIVE" },
                 { ATA_CMD_TRUSTED_RCV_DMA,      "TRUSTED RECEIVE DMA" },
                 { ATA_CMD_TRUSTED_SND,          "TRUSTED SEND" },
                 { ATA_CMD_TRUSTED_SND_DMA,      "TRUSTED SEND DMA" },
                 { ATA_CMD_PMP_READ,             "READ BUFFER" },
+               { ATA_CMD_PMP_READ_DMA,         "READ BUFFER DMA" },
                 { ATA_CMD_PMP_WRITE,            "WRITE BUFFER" },
+               { ATA_CMD_PMP_WRITE_DMA,        "WRITE BUFFER DMA" },
                 { ATA_CMD_CONF_OVERLAY,         "DEVICE CONFIGURATION OVERLAY" },
                 { ATA_CMD_SEC_SET_PASS,         "SECURITY SET PASSWORD" },
                 { ATA_CMD_SEC_UNLOCK,           "SECURITY UNLOCK" },
@@ -2363,6 +2369,8 @@ const char *ata_get_cmd_descript(u8 command)
                 { ATA_CMD_CFA_TRANS_SECT,       "CFA TRANSLATE SECTOR" },
                 { ATA_CMD_CFA_ERASE,            "CFA ERASE SECTORS" },
                 { ATA_CMD_CFA_WRITE_MULT_NE,    "CFA WRITE MULTIPLE WITHOUT ERASE" },
+               { ATA_CMD_REQ_SENSE_DATA,       "REQUEST SENSE DATA EXT" },
+               { ATA_CMD_SANITIZE_DEVICE,      "SANITIZE DEVICE" },
                 { ATA_CMD_READ_LONG,            "READ LONG (with retries)" },
                 { ATA_CMD_READ_LONG_ONCE,       "READ LONG (without retries)" },
                 { ATA_CMD_WRITE_LONG,           "WRITE LONG (with retries)" },
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c

index 150a917..e374132 100644 (file)
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -321,25 +321,25 @@ int ata_tport_add(struct device *parent,
  /*
   * ATA link attributes
   */
+static int noop(int x) { return x; }
  
-
-#define ata_link_show_linkspeed(field)                                 \
+#define ata_link_show_linkspeed(field, format)                         \
  static ssize_t                                                         \
  show_ata_link_##field(struct device *dev,                              \
                       struct device_attribute *attr, char *buf)         \
  {                                                                      \
         struct ata_link *link = transport_class_to_link(dev);           \
                                                                         \
-       return sprintf(buf,"%s\n", sata_spd_string(fls(link->field)));  \
+       return sprintf(buf, "%s\n", sata_spd_string(format(link->field))); \
  }
  
-#define ata_link_linkspeed_attr(field)                                 \
-       ata_link_show_linkspeed(field)                                  \
+#define ata_link_linkspeed_attr(field, format)                         \
+       ata_link_show_linkspeed(field, format)                          \
  static DEVICE_ATTR(field, S_IRUGO, show_ata_link_##field, NULL)
  
-ata_link_linkspeed_attr(hw_sata_spd_limit);
-ata_link_linkspeed_attr(sata_spd_limit);
-ata_link_linkspeed_attr(sata_spd);
+ata_link_linkspeed_attr(hw_sata_spd_limit, fls);
+ata_link_linkspeed_attr(sata_spd_limit, fls);
+ata_link_linkspeed_attr(sata_spd, noop);
  
  
  static DECLARE_TRANSPORT_CLASS(ata_link_class,
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c

index 7f5e5d9..ea3b3dc 100644 (file)
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -343,13 +343,11 @@ static int highbank_initialize_phys(struct device *dev, void __iomem *addr)
  {
         struct device_node *sata_node = dev->of_node;
         int phy_count = 0, phy, port = 0, i;
-       void __iomem *cphy_base[CPHY_PHY_COUNT];
-       struct device_node *phy_nodes[CPHY_PHY_COUNT];
-       u32 tx_atten[CPHY_PORT_COUNT];
+       void __iomem *cphy_base[CPHY_PHY_COUNT] = {};
+       struct device_node *phy_nodes[CPHY_PHY_COUNT] = {};
+       u32 tx_atten[CPHY_PORT_COUNT] = {};
  
         memset(port_data, 0, sizeof(struct phy_lane_info) * CPHY_PORT_COUNT);
-       memset(phy_nodes, 0, sizeof(struct device_node*) * CPHY_PHY_COUNT);
-       memset(tx_atten, 0xff, CPHY_PORT_COUNT);
  
         do {
                 u32 tmp;
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c

index c2d95e9..1dae9a9 100644 (file)
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -792,7 +792,7 @@ static int sata_rcar_probe(struct platform_device *pdev)
                 dev_err(&pdev->dev, "failed to get access to sata clock\n");
                 return PTR_ERR(priv->clk);
         }
-       clk_enable(priv->clk);
+       clk_prepare_enable(priv->clk);
  
         host = ata_host_alloc(&pdev->dev, 1);
         if (!host) {
@@ -822,7 +822,7 @@ static int sata_rcar_probe(struct platform_device *pdev)
                 return 0;
  
  cleanup:
-       clk_disable(priv->clk);
+       clk_disable_unprepare(priv->clk);
  
         return ret;
  }
@@ -841,7 +841,7 @@ static int sata_rcar_remove(struct platform_device *pdev)
         iowrite32(0, base + SATAINTSTAT_REG);
         iowrite32(0x7ff, base + SATAINTMASK_REG);
  
-       clk_disable(priv->clk);
+       clk_disable_unprepare(priv->clk);
  
         return 0;
  }
@@ -861,7 +861,7 @@ static int sata_rcar_suspend(struct device *dev)
                 /* mask */
                 iowrite32(0x7ff, base + SATAINTMASK_REG);
  
-               clk_disable(priv->clk);
+               clk_disable_unprepare(priv->clk);
         }
  
         return ret;
@@ -873,7 +873,7 @@ static int sata_rcar_resume(struct device *dev)
         struct sata_rcar_priv *priv = host->private_data;
         void __iomem *base = priv->base;
  
-       clk_enable(priv->clk);
+       clk_prepare_enable(priv->clk);
  
         /* ack and mask */
         iowrite32(0, base + SATAINTSTAT_REG);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c

index 7413d06..0f38201 100644 (file)
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -216,7 +216,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
                 newattrs.ia_gid = gid;
                 newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
                 mutex_lock(&dentry->d_inode->i_mutex);
-               notify_change(dentry, &newattrs);
+               notify_change(dentry, &newattrs, NULL);
                 mutex_unlock(&dentry->d_inode->i_mutex);
  
                 /* mark as kernel-created inode */
@@ -322,9 +322,9 @@ static int handle_remove(const char *nodename, struct device *dev)
                         newattrs.ia_valid =
                                 ATTR_UID|ATTR_GID|ATTR_MODE;
                         mutex_lock(&dentry->d_inode->i_mutex);
-                       notify_change(dentry, &newattrs);
+                       notify_change(dentry, &newattrs, NULL);
                         mutex_unlock(&dentry->d_inode->i_mutex);
-                       err = vfs_unlink(parent.dentry->d_inode, dentry);
+                       err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
                         if (!err || err == -ENOENT)
                                 deleted = 1;
                 }
diff --git a/drivers/char/misc.c b/drivers/char/misc.c

index 2f685f6..ffa97d2 100644 (file)
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -114,7 +114,7 @@ static int misc_open(struct inode * inode, struct file * file)
         int minor = iminor(inode);
         struct miscdevice *c;
         int err = -ENODEV;
-       const struct file_operations *old_fops, *new_fops = NULL;
+       const struct file_operations *new_fops = NULL;
  
         mutex_lock(&misc_mtx);
         
@@ -141,17 +141,11 @@ static int misc_open(struct inode * inode, struct file * file)
         }
  
         err = 0;
-       old_fops = file->f_op;
-       file->f_op = new_fops;
+       replace_fops(file, new_fops);
         if (file->f_op->open) {
                 file->private_data = c;
-               err=file->f_op->open(inode,file);
-               if (err) {
-                       fops_put(file->f_op);
-                       file->f_op = fops_get(old_fops);
-               }
+               err = file->f_op->open(inode,file);
         }
-       fops_put(old_fops);
  fail:
         mutex_unlock(&misc_mtx);
         return err;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c

index 3f84277..22d14ec 100644 (file)
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -148,7 +148,7 @@ int drm_stub_open(struct inode *inode, struct file *filp)
         struct drm_minor *minor;
         int minor_id = iminor(inode);
         int err = -ENODEV;
-       const struct file_operations *old_fops;
+       const struct file_operations *new_fops;
  
         DRM_DEBUG("\n");
  
@@ -163,18 +163,13 @@ int drm_stub_open(struct inode *inode, struct file *filp)
         if (drm_device_is_unplugged(dev))
                 goto out;
  
-       old_fops = filp->f_op;
-       filp->f_op = fops_get(dev->driver->fops);
-       if (filp->f_op == NULL) {
-               filp->f_op = old_fops;
+       new_fops = fops_get(dev->driver->fops);
+       if (!new_fops)
                 goto out;
-       }
-       if (filp->f_op->open && (err = filp->f_op->open(inode, filp))) {
-               fops_put(filp->f_op);
-               filp->f_op = fops_get(old_fops);
-       }
-       fops_put(old_fops);
  
+       replace_fops(filp, new_fops);
+       if (filp->f_op->open)
+               err = filp->f_op->open(inode, filp);
  out:
         mutex_unlock(&drm_global_mutex);
         return err;
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c

index 0b4616b..c0363f1 100644 (file)
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -206,8 +206,6 @@ static int dvb_dvr_release(struct inode *inode, struct file *file)
         /* TODO */
         dvbdev->users--;
         if (dvbdev->users == 1 && dmxdev->exit == 1) {
-               fops_put(file->f_op);
-               file->f_op = NULL;
                 mutex_unlock(&dmxdev->mutex);
                 wake_up(&dvbdev->wait_queue);
         } else
@@ -1120,8 +1118,6 @@ static int dvb_demux_release(struct inode *inode, struct file *file)
         mutex_lock(&dmxdev->mutex);
         dmxdev->dvbdev->users--;
         if(dmxdev->dvbdev->users==1 && dmxdev->exit==1) {
-               fops_put(file->f_op);
-               file->f_op = NULL;
                 mutex_unlock(&dmxdev->mutex);
                 wake_up(&dmxdev->dvbdev->wait_queue);
         } else
diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c

index 401ef64..983db75 100644 (file)
--- a/drivers/media/dvb-core/dvbdev.c
+++ b/drivers/media/dvb-core/dvbdev.c
@@ -74,22 +74,15 @@ static int dvb_device_open(struct inode *inode, struct file *file)
  
         if (dvbdev && dvbdev->fops) {
                 int err = 0;
-               const struct file_operations *old_fops;
+               const struct file_operations *new_fops;
  
-               file->private_data = dvbdev;
-               old_fops = file->f_op;
-               file->f_op = fops_get(dvbdev->fops);
-               if (file->f_op == NULL) {
-                       file->f_op = old_fops;
+               new_fops = fops_get(dvbdev->fops);
+               if (!new_fops)
                         goto fail;
-               }
-               if(file->f_op->open)
+               file->private_data = dvbdev;
+               replace_fops(file, new_fops);
+               if (file->f_op->open)
                         err = file->f_op->open(inode,file);
-               if (err) {
-                       fops_put(file->f_op);
-                       file->f_op = fops_get(old_fops);
-               }
-               fops_put(old_fops);
                 up_read(&minor_rwsem);
                 mutex_unlock(&dvbdev_mutex);
                 return err;
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c

index bdc1d15..d1c7de9 100644 (file)
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -575,7 +575,7 @@ static int alloc_device(struct nandsim *ns)
                 cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600);
                 if (IS_ERR(cfile))
                         return PTR_ERR(cfile);
-               if (!cfile->f_op || (!cfile->f_op->read && !cfile->f_op->aio_read)) {
+               if (!cfile->f_op->read && !cfile->f_op->aio_read) {
                         NS_ERR("alloc_device: cache file not readable\n");
                         err = -EINVAL;
                         goto err_close;
diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c

index c071d41..33bb1f2 100644 (file)
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c
@@ -900,10 +900,9 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
                  * number.
                  */
                 image_seq = be32_to_cpu(ech->image_seq);
-               if (!ubi->image_seq && image_seq)
+               if (!ubi->image_seq)
                         ubi->image_seq = image_seq;
-               if (ubi->image_seq && image_seq &&
-                   ubi->image_seq != image_seq) {
+               if (image_seq && ubi->image_seq != image_seq) {
                         ubi_err("bad image sequence number %d in PEB %d, expected %d",
                                 image_seq, pnum, ubi->image_seq);
                         ubi_dump_ec_hdr(ech);
@@ -1417,9 +1416,11 @@ int ubi_attach(struct ubi_device *ubi, int force_scan)
                                 ai = alloc_ai("ubi_aeb_slab_cache2");
                                 if (!ai)
                                         return -ENOMEM;
-                       }
  
-                       err = scan_all(ubi, ai, UBI_FM_MAX_START);
+                               err = scan_all(ubi, ai, 0);
+                       } else {
+                               err = scan_all(ubi, ai, UBI_FM_MAX_START);
+                       }
                 }
         }
  #else
diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c

index f5aa4b0..ead8613 100644 (file)
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -407,6 +407,7 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai,
          */
         for (i = 0; i < pool_size; i++) {
                 int scrub = 0;
+               int image_seq;
  
                 pnum = be32_to_cpu(pebs[i]);
  
@@ -425,10 +426,16 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai,
                 } else if (ret == UBI_IO_BITFLIPS)
                         scrub = 1;
  
-               if (be32_to_cpu(ech->image_seq) != ubi->image_seq) {
+               /*
+                * Older UBI implementations have image_seq set to zero, so
+                * we shouldn't fail if image_seq == 0.
+                */
+               image_seq = be32_to_cpu(ech->image_seq);
+
+               if (image_seq && (image_seq != ubi->image_seq)) {
                         ubi_err("bad image seq: 0x%x, expected: 0x%x",
                                 be32_to_cpu(ech->image_seq), ubi->image_seq);
-                       err = UBI_BAD_FASTMAP;
+                       ret = UBI_BAD_FASTMAP;
                         goto out;
                 }
  
@@ -819,6 +826,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
         list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list)
                 list_move_tail(&tmp_aeb->u.list, &ai->free);
  
+       ubi_assert(list_empty(&used));
+       ubi_assert(list_empty(&eba_orphans));
+       ubi_assert(list_empty(&free));
+
         /*
          * If fastmap is leaking PEBs (must not happen), raise a
          * fat warning and fall back to scanning mode.
@@ -834,6 +845,19 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
  fail_bad:
         ret = UBI_BAD_FASTMAP;
  fail:
+       list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &used, u.list) {
+               kmem_cache_free(ai->aeb_slab_cache, tmp_aeb);
+               list_del(&tmp_aeb->u.list);
+       }
+       list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &eba_orphans, u.list) {
+               kmem_cache_free(ai->aeb_slab_cache, tmp_aeb);
+               list_del(&tmp_aeb->u.list);
+       }
+       list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) {
+               kmem_cache_free(ai->aeb_slab_cache, tmp_aeb);
+               list_del(&tmp_aeb->u.list);
+       }
+
         return ret;
  }
  
@@ -923,6 +947,8 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
         }
  
         for (i = 0; i < used_blocks; i++) {
+               int image_seq;
+
                 pnum = be32_to_cpu(fmsb->block_loc[i]);
  
                 if (ubi_io_is_bad(ubi, pnum)) {
@@ -940,10 +966,17 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
                 } else if (ret == UBI_IO_BITFLIPS)
                         fm->to_be_tortured[i] = 1;
  
+               image_seq = be32_to_cpu(ech->image_seq);
                 if (!ubi->image_seq)
-                       ubi->image_seq = be32_to_cpu(ech->image_seq);
+                       ubi->image_seq = image_seq;
  
-               if (be32_to_cpu(ech->image_seq) != ubi->image_seq) {
+               /*
+                * Older UBI implementations have image_seq set to zero, so
+                * we shouldn't fail if image_seq == 0.
+                */
+               if (image_seq && (image_seq != ubi->image_seq)) {
+                       ubi_err("wrong image seq:%d instead of %d",
+                               be32_to_cpu(ech->image_seq), ubi->image_seq);
                         ret = UBI_BAD_FASTMAP;
                         goto free_hdr;
                 }
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c

index c95bfb1..02317c1 100644 (file)
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -599,10 +599,6 @@ static void refill_wl_user_pool(struct ubi_device *ubi)
         return_unused_pool_pebs(ubi, pool);
  
         for (pool->size = 0; pool->size < pool->max_size; pool->size++) {
-               if (!ubi->free.rb_node ||
-                  (ubi->free_count - ubi->beb_rsvd_pebs < 1))
-                       break;
-
                 pool->pebs[pool->size] = __wl_get_peb(ubi);
                 if (pool->pebs[pool->size] < 0)
                         break;
diff --git a/drivers/staging/comedi/comedi_compat32.c b/drivers/staging/comedi/comedi_compat32.c

index 2dfb06a..1e9da40 100644 (file)
--- a/drivers/staging/comedi/comedi_compat32.c
+++ b/drivers/staging/comedi/comedi_compat32.c
@@ -86,9 +86,6 @@ struct comedi32_insnlist_struct {
  static int translated_ioctl(struct file *file, unsigned int cmd,
                             unsigned long arg)
  {
-       if (!file->f_op)
-               return -ENOTTY;
-
         if (file->f_op->unlocked_ioctl)
                 return file->f_op->unlocked_ioctl(file, cmd, arg);
  
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h

index 359c6c1..eefdb8d 100644 (file)
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -105,8 +105,8 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
  #define ll_vfs_unlink(inode,entry,mnt)   vfs_unlink(inode,entry)
  #define ll_vfs_mknod(dir,entry,mnt,mode,dev)    vfs_mknod(dir,entry,mode,dev)
  #define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
-#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-               vfs_rename(old,old_dir,new,new_dir)
+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1,delegated_inode) \
+               vfs_rename(old,old_dir,new,new_dir,delegated_inode)
  
  #define cfs_bio_io_error(a,b)   bio_io_error((a))
  #define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c

index 34815b5..90bbdae 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -83,7 +83,7 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh)
  }
  
  
-/* called from iget5_locked->find_inode() under inode_lock spinlock */
+/* called from iget5_locked->find_inode() under inode_hash_lock spinlock */
  static int ll_test_inode(struct inode *inode, void *opaque)
  {
         struct ll_inode_info *lli = ll_i2info(inode);
diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c

index 1ef06fe..09474e7 100644 (file)
--- a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
+++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
@@ -224,7 +224,7 @@ int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
                 GOTO(put_old, err = PTR_ERR(dchild_new));
  
         err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
-                           dir->d_inode, dchild_new, mnt);
+                           dir->d_inode, dchild_new, mnt, NULL);
  
         dput(dchild_new);
  put_old:
diff --git a/drivers/staging/rtl8188eu/include/osdep_service.h b/drivers/staging/rtl8188eu/include/osdep_service.h

index 44f24fa..36523ed 100644 (file)
--- a/drivers/staging/rtl8188eu/include/osdep_service.h
+++ b/drivers/staging/rtl8188eu/include/osdep_service.h
@@ -430,11 +430,6 @@ int ATOMIC_SUB_RETURN(ATOMIC_T *v, int i);
  int ATOMIC_INC_RETURN(ATOMIC_T *v);
  int ATOMIC_DEC_RETURN(ATOMIC_T *v);
  
-/* File operation APIs, just for linux now */
-int rtw_is_file_readable(char *path);
-int rtw_retrive_from_file(char *path, u8 __user *buf, u32 sz);
-int rtw_store_to_file(char *path, u8 __user *buf, u32 sz);
-
  struct rtw_netdev_priv_indicator {
         void *priv;
         u32 sizeof_priv;
diff --git a/drivers/staging/rtl8188eu/os_dep/osdep_service.c b/drivers/staging/rtl8188eu/os_dep/osdep_service.c

index 5a9e9e4..a1ae727 100644 (file)
--- a/drivers/staging/rtl8188eu/os_dep/osdep_service.c
+++ b/drivers/staging/rtl8188eu/os_dep/osdep_service.c
@@ -356,214 +356,6 @@ inline int ATOMIC_DEC_RETURN(ATOMIC_T *v)
         return atomic_dec_return(v);
  }
  
-/* Open a file with the specific @param path, @param flag, @param mode
- * @param fpp the pointer of struct file pointer to get struct file pointer while file opening is success
- * @param path the path of the file to open
- * @param flag file operation flags, please refer to linux document
- * @param mode please refer to linux document
- * @return Linux specific error code
- */
-static int openfile(struct file **fpp, char *path, int flag, int mode)
-{
-       struct file *fp;
-
-       fp = filp_open(path, flag, mode);
-       if (IS_ERR(fp)) {
-               *fpp = NULL;
-               return PTR_ERR(fp);
-       } else {
-               *fpp = fp;
-               return 0;
-       }
-}
-
-/* Close the file with the specific @param fp
- * @param fp the pointer of struct file to close
- * @return always 0
- */
-static int closefile(struct file *fp)
-{
-       filp_close(fp, NULL);
-       return 0;
-}
-
-static int readfile(struct file *fp, char __user *buf, int len)
-{
-       int rlen = 0, sum = 0;
-
-       if (!fp->f_op || !fp->f_op->read)
-               return -EPERM;
-
-       while (sum < len) {
-               rlen = fp->f_op->read(fp, buf+sum, len-sum, &fp->f_pos);
-               if (rlen > 0)
-                       sum += rlen;
-               else if (0 != rlen)
-                       return rlen;
-               else
-                       break;
-       }
-       return  sum;
-}
-
-static int writefile(struct file *fp, char __user *buf, int len)
-{
-       int wlen = 0, sum = 0;
-
-       if (!fp->f_op || !fp->f_op->write)
-               return -EPERM;
-
-       while (sum < len) {
-               wlen = fp->f_op->write(fp, buf+sum, len-sum, &fp->f_pos);
-               if (wlen > 0)
-                       sum += wlen;
-               else if (0 != wlen)
-                       return wlen;
-               else
-                       break;
-       }
-       return sum;
-}
-
-/* Test if the specifi @param path is a file and readable
- * @param path the path of the file to test
- * @return Linux specific error code
- */
-static int isfilereadable(char *path)
-{
-       struct file *fp;
-       int ret = 0;
-       mm_segment_t oldfs;
-       char __user buf;
-
-       fp = filp_open(path, O_RDONLY, 0);
-       if (IS_ERR(fp)) {
-               ret = PTR_ERR(fp);
-       } else {
-               oldfs = get_fs(); set_fs(get_ds());
-
-               if (1 != readfile(fp, &buf, 1))
-                       ret = PTR_ERR(fp);
-
-               set_fs(oldfs);
-               filp_close(fp, NULL);
-       }
-       return ret;
-}
-
-/* Open the file with @param path and retrive the file content into
- * memory starting from @param buf for @param sz at most
- * @param path the path of the file to open and read
- * @param buf the starting address of the buffer to store file content
- * @param sz how many bytes to read at most
- * @return the byte we've read, or Linux specific error code
- */
-static int retrievefromfile(char *path, u8 __user *buf, u32 sz)
-{
-       int ret = -1;
-       mm_segment_t oldfs;
-       struct file *fp;
-
-       if (path && buf) {
-               ret = openfile(&fp, path, O_RDONLY, 0);
-               if (0 == ret) {
-                       DBG_88E("%s openfile path:%s fp =%p\n", __func__,
-                               path, fp);
-
-                       oldfs = get_fs(); set_fs(get_ds());
-                       ret = readfile(fp, buf, sz);
-                       set_fs(oldfs);
-                       closefile(fp);
-
-                       DBG_88E("%s readfile, ret:%d\n", __func__, ret);
-
-               } else {
-                       DBG_88E("%s openfile path:%s Fail, ret:%d\n", __func__,
-                               path, ret);
-               }
-       } else {
-               DBG_88E("%s NULL pointer\n", __func__);
-               ret =  -EINVAL;
-       }
-       return ret;
-}
-
-/*
-* Open the file with @param path and wirte @param sz byte of data starting from @param buf into the file
-* @param path the path of the file to open and write
-* @param buf the starting address of the data to write into file
-* @param sz how many bytes to write at most
-* @return the byte we've written, or Linux specific error code
-*/
-static int storetofile(char *path, u8 __user *buf, u32 sz)
-{
-       int ret = 0;
-       mm_segment_t oldfs;
-       struct file *fp;
-
-       if (path && buf) {
-               ret = openfile(&fp, path, O_CREAT|O_WRONLY, 0666);
-               if (0 == ret) {
-                       DBG_88E("%s openfile path:%s fp =%p\n", __func__, path, fp);
-
-                       oldfs = get_fs(); set_fs(get_ds());
-                       ret = writefile(fp, buf, sz);
-                       set_fs(oldfs);
-                       closefile(fp);
-
-                       DBG_88E("%s writefile, ret:%d\n", __func__, ret);
-
-               } else {
-                       DBG_88E("%s openfile path:%s Fail, ret:%d\n", __func__, path, ret);
-               }
-       } else {
-               DBG_88E("%s NULL pointer\n", __func__);
-               ret =  -EINVAL;
-       }
-       return ret;
-}
-
-/*
-* Test if the specifi @param path is a file and readable
-* @param path the path of the file to test
-* @return true or false
-*/
-int rtw_is_file_readable(char *path)
-{
-       if (isfilereadable(path) == 0)
-               return true;
-       else
-               return false;
-}
-
-/*
-* Open the file with @param path and retrive the file content into memory starting from @param buf for @param sz at most
-* @param path the path of the file to open and read
-* @param buf the starting address of the buffer to store file content
-* @param sz how many bytes to read at most
-* @return the byte we've read
-*/
-int rtw_retrive_from_file(char *path, u8 __user *buf, u32 sz)
-{
-       int ret = retrievefromfile(path, buf, sz);
-
-       return ret >= 0 ? ret : 0;
-}
-
-/*
- * Open the file with @param path and wirte @param sz byte of data
- * starting from @param buf into the file
- * @param path the path of the file to open and write
- * @param buf the starting address of the data to write into file
- * @param sz how many bytes to write at most
- * @return the byte we've written
- */
-int rtw_store_to_file(char *path, u8 __user *buf, u32 sz)
-{
-       int ret = storetofile(path, buf, sz);
-       return ret >= 0 ? ret : 0;
-}
-
  struct net_device *rtw_alloc_etherdev_with_old_priv(int sizeof_priv,
                                                     void *old_priv)
  {
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c

index 3bdfbf8..ea337a7 100644 (file)
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -29,27 +29,19 @@ static DECLARE_RWSEM(minor_rwsem);
  
  static int usb_open(struct inode *inode, struct file *file)
  {
-       int minor = iminor(inode);
-       const struct file_operations *c;
         int err = -ENODEV;
-       const struct file_operations *old_fops, *new_fops = NULL;
+       const struct file_operations *new_fops;
  
         down_read(&minor_rwsem);
-       c = usb_minors[minor];
+       new_fops = fops_get(usb_minors[iminor(inode)]);
  
-       if (!c || !(new_fops = fops_get(c)))
+       if (!new_fops)
                 goto done;
  
-       old_fops = file->f_op;
-       file->f_op = new_fops;
+       replace_fops(file, new_fops);
         /* Curiouser and curiouser... NULL ->open() as "no device" ? */
         if (file->f_op->open)
                 err = file->f_op->open(inode, file);
-       if (err) {
-               fops_put(file->f_op);
-               file->f_op = fops_get(old_fops);
-       }
-       fops_put(old_fops);
   done:
         up_read(&minor_rwsem);
         return err;
diff --git a/fs/9p/cache.h b/fs/9p/cache.h

index 40cc54c..2f96754 100644 (file)
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -101,6 +101,18 @@ static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
  
  #else /* CONFIG_9P_FSCACHE */
  
+static inline void v9fs_cache_inode_get_cookie(struct inode *inode)
+{
+}
+
+static inline void v9fs_cache_inode_put_cookie(struct inode *inode)
+{
+}
+
+static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *file)
+{
+}
+
  static inline int v9fs_fscache_release_page(struct page *page,
                                             gfp_t gfp) {
         return 1;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c

index aa5ecf4..a0df3e7 100644 (file)
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -105,10 +105,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
                 v9inode->writeback_fid = (void *) fid;
         }
         mutex_unlock(&v9inode->v_mutex);
-#ifdef CONFIG_9P_FSCACHE
         if (v9ses->cache)
                 v9fs_cache_inode_set_cookie(inode, file);
-#endif
         return 0;
  out_error:
         p9_client_clunk(file->private_data);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c

index 94de6d1..4e65aa9 100644 (file)
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -448,9 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
         clear_inode(inode);
         filemap_fdatawrite(inode->i_mapping);
  
-#ifdef CONFIG_9P_FSCACHE
         v9fs_cache_inode_put_cookie(inode);
-#endif
         /* clunk the fid stashed in writeback_fid */
         if (v9inode->writeback_fid) {
                 p9_client_clunk(v9inode->writeback_fid);
@@ -531,9 +529,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
                 goto error;
  
         v9fs_stat2inode(st, inode, sb);
-#ifdef CONFIG_9P_FSCACHE
         v9fs_cache_inode_get_cookie(inode);
-#endif
         unlock_new_inode(inode);
         return inode;
  error:
@@ -905,10 +901,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
                 goto error;
  
         file->private_data = fid;
-#ifdef CONFIG_9P_FSCACHE
         if (v9ses->cache)
                 v9fs_cache_inode_set_cookie(dentry->d_inode, file);
-#endif
  
         *opened |= FILE_CREATED;
  out:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c

index a7c4814..4c10ede 100644 (file)
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -141,9 +141,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
                 goto error;
  
         v9fs_stat2inode_dotl(st, inode);
-#ifdef CONFIG_9P_FSCACHE
         v9fs_cache_inode_get_cookie(inode);
-#endif
         retval = v9fs_get_acl(inode, fid);
         if (retval)
                 goto error;
@@ -355,10 +353,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
         if (err)
                 goto err_clunk_old_fid;
         file->private_data = ofid;
-#ifdef CONFIG_9P_FSCACHE
         if (v9ses->cache)
                 v9fs_cache_inode_set_cookie(inode, file);
-#endif
         *opened |= FILE_CREATED;
  out:
         v9fs_put_acl(dacl, pacl);
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h

index 585adaf..c770337 100644 (file)
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -43,9 +43,12 @@ struct adfs_dir_ops;
   * ADFS file system superblock data in memory
   */
  struct adfs_sb_info {
-       struct adfs_discmap *s_map;     /* bh list containing map                */
-       struct adfs_dir_ops *s_dir;     /* directory operations                  */
-
+       union { struct {
+               struct adfs_discmap *s_map;     /* bh list containing map        */
+               struct adfs_dir_ops *s_dir;     /* directory operations          */
+               };
+               struct rcu_head rcu;            /* used only at shutdown time    */
+       };
         kuid_t          s_uid;          /* owner uid                             */
         kgid_t          s_gid;          /* owner gid                             */
         umode_t         s_owner_mask;   /* ADFS owner perm -> unix perm          */
diff --git a/fs/adfs/super.c b/fs/adfs/super.c

index 0ff4bae..7b3003c 100644 (file)
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -123,8 +123,7 @@ static void adfs_put_super(struct super_block *sb)
         for (i = 0; i < asb->s_map_size; i++)
                 brelse(asb->s_map[i].dm_bh);
         kfree(asb->s_map);
-       kfree(asb);
-       sb->s_fs_info = NULL;
+       kfree_rcu(asb, rcu);
  }
  
  static int adfs_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/aio.c b/fs/aio.c

index 067e3d3..1f602d9 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -36,10 +36,10 @@
  #include <linux/eventfd.h>
  #include <linux/blkdev.h>
  #include <linux/compat.h>
-#include <linux/anon_inodes.h>
  #include <linux/migrate.h>
  #include <linux/ramfs.h>
  #include <linux/percpu-refcount.h>
+#include <linux/mount.h>
  
  #include <asm/kmap_types.h>
  #include <asm/uaccess.h>
@@ -152,12 +152,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
  static struct kmem_cache       *kiocb_cachep;
  static struct kmem_cache       *kioctx_cachep;
  
+static struct vfsmount *aio_mnt;
+
+static const struct file_operations aio_ring_fops;
+static const struct address_space_operations aio_ctx_aops;
+
+static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
+{
+       struct qstr this = QSTR_INIT("[aio]", 5);
+       struct file *file;
+       struct path path;
+       struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+
+       inode->i_mapping->a_ops = &aio_ctx_aops;
+       inode->i_mapping->private_data = ctx;
+       inode->i_size = PAGE_SIZE * nr_pages;
+
+       path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
+       if (!path.dentry) {
+               iput(inode);
+               return ERR_PTR(-ENOMEM);
+       }
+       path.mnt = mntget(aio_mnt);
+
+       d_instantiate(path.dentry, inode);
+       file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
+       if (IS_ERR(file)) {
+               path_put(&path);
+               return file;
+       }
+
+       file->f_flags = O_RDWR;
+       file->private_data = ctx;
+       return file;
+}
+
+static struct dentry *aio_mount(struct file_system_type *fs_type,
+                               int flags, const char *dev_name, void *data)
+{
+       static const struct dentry_operations ops = {
+               .d_dname        = simple_dname,
+       };
+       return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
+}
+
  /* aio_setup
   *     Creates the slab caches used by the aio routines, panic on
   *     failure as this is done early during the boot sequence.
   */
  static int __init aio_setup(void)
  {
+       static struct file_system_type aio_fs = {
+               .name           = "aio",
+               .mount          = aio_mount,
+               .kill_sb        = kill_anon_super,
+       };
+       aio_mnt = kern_mount(&aio_fs);
+       if (IS_ERR(aio_mnt))
+               panic("Failed to create aio fs mount.");
+
         kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
         kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
  
@@ -283,16 +338,12 @@ static int aio_setup_ring(struct kioctx *ctx)
         if (nr_pages < 0)
                 return -EINVAL;
  
-       file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR);
+       file = aio_private_file(ctx, nr_pages);
         if (IS_ERR(file)) {
                 ctx->aio_ring_file = NULL;
                 return -EAGAIN;
         }
  
-       file->f_inode->i_mapping->a_ops = &aio_ctx_aops;
-       file->f_inode->i_mapping->private_data = ctx;
-       file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages;
-
         for (i = 0; i < nr_pages; i++) {
                 struct page *page;
                 page = find_or_create_page(file->f_inode->i_mapping,
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c

index 85c9618..2408473 100644 (file)
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,7 +24,6 @@
  
  static struct vfsmount *anon_inode_mnt __read_mostly;
  static struct inode *anon_inode_inode;
-static const struct file_operations anon_inode_fops;
  
  /*
   * anon_inodefs_dname() is called from d_path().
@@ -39,51 +38,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
         .d_dname        = anon_inodefs_dname,
  };
  
-/*
- * nop .set_page_dirty method so that people can use .page_mkwrite on
- * anon inodes.
- */
-static int anon_set_page_dirty(struct page *page)
-{
-       return 0;
-};
-
-static const struct address_space_operations anon_aops = {
-       .set_page_dirty = anon_set_page_dirty,
-};
-
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-static struct inode *anon_inode_mkinode(struct super_block *s)
-{
-       struct inode *inode = new_inode_pseudo(s);
-
-       if (!inode)
-               return ERR_PTR(-ENOMEM);
-
-       inode->i_ino = get_next_ino();
-       inode->i_fop = &anon_inode_fops;
-
-       inode->i_mapping->a_ops = &anon_aops;
-
-       /*
-        * Mark the inode dirty from the very beginning,
-        * that way it will never be moved to the dirty
-        * list because mark_inode_dirty() will think
-        * that it already _is_ on the dirty list.
-        */
-       inode->i_state = I_DIRTY;
-       inode->i_mode = S_IRUSR | S_IWUSR;
-       inode->i_uid = current_fsuid();
-       inode->i_gid = current_fsgid();
-       inode->i_flags |= S_PRIVATE;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       return inode;
-}
-
  static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
                                 int flags, const char *dev_name, void *data)
  {
@@ -92,7 +46,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
                         &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
         if (!IS_ERR(root)) {
                 struct super_block *s = root->d_sb;
-               anon_inode_inode = anon_inode_mkinode(s);
+               anon_inode_inode = alloc_anon_inode(s);
                 if (IS_ERR(anon_inode_inode)) {
                         dput(root);
                         deactivate_locked_super(s);
@@ -108,72 +62,6 @@ static struct file_system_type anon_inode_fs_type = {
         .kill_sb        = kill_anon_super,
  };
  
-/**
- * anon_inode_getfile_private - creates a new file instance by hooking it up to an
- *                      anonymous inode, and a dentry that describe the "class"
- *                      of the file
- *
- * @name:    [in]    name of the "class" of the new file
- * @fops:    [in]    file operations for the new file
- * @priv:    [in]    private data for the new file (will be file's private_data)
- * @flags:   [in]    flags
- *
- *
- * Similar to anon_inode_getfile, but each file holds a single inode.
- *
- */
-struct file *anon_inode_getfile_private(const char *name,
-                                       const struct file_operations *fops,
-                                       void *priv, int flags)
-{
-       struct qstr this;
-       struct path path;
-       struct file *file;
-       struct inode *inode;
-
-       if (fops->owner && !try_module_get(fops->owner))
-               return ERR_PTR(-ENOENT);
-
-       inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb);
-       if (IS_ERR(inode)) {
-               file = ERR_PTR(-ENOMEM);
-               goto err_module;
-       }
-
-       /*
-        * Link the inode to a directory entry by creating a unique name
-        * using the inode sequence number.
-        */
-       file = ERR_PTR(-ENOMEM);
-       this.name = name;
-       this.len = strlen(name);
-       this.hash = 0;
-       path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
-       if (!path.dentry)
-               goto err_module;
-
-       path.mnt = mntget(anon_inode_mnt);
-
-       d_instantiate(path.dentry, inode);
-
-       file = alloc_file(&path, OPEN_FMODE(flags), fops);
-       if (IS_ERR(file))
-               goto err_dput;
-
-       file->f_mapping = inode->i_mapping;
-       file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
-       file->private_data = priv;
-
-       return file;
-
-err_dput:
-       path_put(&path);
-err_module:
-       module_put(fops->owner);
-       return file;
-}
-EXPORT_SYMBOL_GPL(anon_inode_getfile_private);
-
  /**
   * anon_inode_getfile - creates a new file instance by hooking it up to an
   *                      anonymous inode, and a dentry that describe the "class"
diff --git a/fs/attr.c b/fs/attr.c

index 1449adb..267968d 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -167,7 +167,27 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
  }
  EXPORT_SYMBOL(setattr_copy);
  
-int notify_change(struct dentry * dentry, struct iattr * attr)
+/**
+ * notify_change - modify attributes of a filesytem object
+ * @dentry:    object affected
+ * @iattr:     new attributes
+ * @delegated_inode: returns inode, if the inode is delegated
+ *
+ * The caller must hold the i_mutex on the affected object.
+ *
+ * If notify_change discovers a delegation in need of breaking,
+ * it will return -EWOULDBLOCK and return a reference to the inode in
+ * delegated_inode.  The caller should then break the delegation and
+ * retry.  Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.  Also, passing NULL is fine for callers holding
+ * the file open for write, as there can be no conflicting delegation in
+ * that case.
+ */
+int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
  {
         struct inode *inode = dentry->d_inode;
         umode_t mode = inode->i_mode;
@@ -241,6 +261,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
                 return 0;
  
         error = security_inode_setattr(dentry, attr);
+       if (error)
+               return error;
+       error = try_break_deleg(inode, delegated_inode);
         if (error)
                 return error;
  
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h

index 3f1128b..4218e26 100644 (file)
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -122,6 +122,7 @@ struct autofs_sb_info {
         spinlock_t lookup_lock;
         struct list_head active_list;
         struct list_head expiring_list;
+       struct rcu_head rcu;
  };
  
  static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -271,7 +272,7 @@ void autofs4_clean_ino(struct autofs_info *);
  
  static inline int autofs_prepare_pipe(struct file *pipe)
  {
-       if (!pipe->f_op || !pipe->f_op->write)
+       if (!pipe->f_op->write)
                 return -EINVAL;
         if (!S_ISFIFO(file_inode(pipe)->i_mode))
                 return -EINVAL;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c

index 0f00da3..1818ce7 100644 (file)
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -658,12 +658,6 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use
                         goto out;
                 }
  
-               if (!fp->f_op) {
-                       err = -ENOTTY;
-                       fput(fp);
-                       goto out;
-               }
-
                 sbi = autofs_dev_ioctl_sbi(fp);
                 if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
                         err = -EINVAL;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c

index b104726..3b9cc9b 100644 (file)
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -56,18 +56,13 @@ void autofs4_kill_sb(struct super_block *sb)
          * just call kill_anon_super when we are called from
          * deactivate_super.
          */
-       if (!sbi)
-               goto out_kill_sb;
-
-       /* Free wait queues, close pipe */
-       autofs4_catatonic_mode(sbi);
-
-       sb->s_fs_info = NULL;
-       kfree(sbi);
+       if (sbi) /* Free wait queues, close pipe */
+               autofs4_catatonic_mode(sbi);
  
-out_kill_sb:
         DPRINTK("shutting down");
         kill_litter_super(sb);
+       if (sbi)
+               kfree_rcu(sbi, rcu);
  }
  
  static int autofs4_show_options(struct seq_file *m, struct dentry *root)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c

index e9c75e2..daa15d6 100644 (file)
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,7 +42,7 @@ static void befs_destroy_inode(struct inode *inode);
  static int befs_init_inodecache(void);
  static void befs_destroy_inodecache(void);
  static void *befs_follow_link(struct dentry *, struct nameidata *);
-static void befs_put_link(struct dentry *, struct nameidata *, void *);
+static void *befs_fast_follow_link(struct dentry *, struct nameidata *);
  static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
                         char **out, int *out_len);
  static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -79,10 +79,15 @@ static const struct address_space_operations befs_aops = {
         .bmap           = befs_bmap,
  };
  
+static const struct inode_operations befs_fast_symlink_inode_operations = {
+       .readlink       = generic_readlink,
+       .follow_link    = befs_fast_follow_link,
+};
+
  static const struct inode_operations befs_symlink_inode_operations = {
         .readlink       = generic_readlink,
         .follow_link    = befs_follow_link,
-       .put_link       = befs_put_link,
+       .put_link       = kfree_put_link,
  };
  
  /* 
@@ -411,7 +416,10 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
                 inode->i_op = &befs_dir_inode_operations;
                 inode->i_fop = &befs_dir_operations;
         } else if (S_ISLNK(inode->i_mode)) {
-               inode->i_op = &befs_symlink_inode_operations;
+               if (befs_ino->i_flags & BEFS_LONG_SYMLINK)
+                       inode->i_op = &befs_symlink_inode_operations;
+               else
+                       inode->i_op = &befs_fast_symlink_inode_operations;
         } else {
                 befs_error(sb, "Inode %lu is not a regular file, "
                            "directory or symlink. THAT IS WRONG! BeFS has no "
@@ -477,47 +485,40 @@ befs_destroy_inodecache(void)
  static void *
  befs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
+       struct super_block *sb = dentry->d_sb;
         befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+       befs_data_stream *data = &befs_ino->i_data.ds;
+       befs_off_t len = data->size;
         char *link;
  
-       if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
-               struct super_block *sb = dentry->d_sb;
-               befs_data_stream *data = &befs_ino->i_data.ds;
-               befs_off_t len = data->size;
+       if (len == 0) {
+               befs_error(sb, "Long symlink with illegal length");
+               link = ERR_PTR(-EIO);
+       } else {
+               befs_debug(sb, "Follow long symlink");
  
-               if (len == 0) {
-                       befs_error(sb, "Long symlink with illegal length");
+               link = kmalloc(len, GFP_NOFS);
+               if (!link) {
+                       link = ERR_PTR(-ENOMEM);
+               } else if (befs_read_lsymlink(sb, data, link, len) != len) {
+                       kfree(link);
+                       befs_error(sb, "Failed to read entire long symlink");
                         link = ERR_PTR(-EIO);
                 } else {
-                       befs_debug(sb, "Follow long symlink");
-
-                       link = kmalloc(len, GFP_NOFS);
-                       if (!link) {
-                               link = ERR_PTR(-ENOMEM);
-                       } else if (befs_read_lsymlink(sb, data, link, len) != len) {
-                               kfree(link);
-                               befs_error(sb, "Failed to read entire long symlink");
-                               link = ERR_PTR(-EIO);
-                       } else {
-                               link[len - 1] = '\0';
-                       }
+                       link[len - 1] = '\0';
                 }
-       } else {
-               link = befs_ino->i_data.symlink;
         }
-
         nd_set_link(nd, link);
         return NULL;
  }
  
-static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+
+static void *
+befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
-       if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
-               char *link = nd_get_link(nd);
-               if (!IS_ERR(link))
-                       kfree(link);
-       }
+       nd_set_link(nd, befs_ino->i_data.symlink);
+       return NULL;
  }
  
  /*
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c

index 89dec7f..ca0ba15 100644 (file)
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -45,7 +45,6 @@ static int load_aout_library(struct file*);
   */
  static int aout_core_dump(struct coredump_params *cprm)
  {
-       struct file *file = cprm->file;
         mm_segment_t fs;
         int has_dumped = 0;
         void __user *dump_start;
@@ -85,10 +84,10 @@ static int aout_core_dump(struct coredump_params *cprm)
  
         set_fs(KERNEL_DS);
  /* struct user */
-       if (!dump_write(file, &dump, sizeof(dump)))
+       if (!dump_emit(cprm, &dump, sizeof(dump)))
                 goto end_coredump;
  /* Now dump all of the user data.  Include malloced stuff as well */
-       if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
+       if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
                 goto end_coredump;
  /* now we start writing out the user space info */
         set_fs(USER_DS);
@@ -96,14 +95,14 @@ static int aout_core_dump(struct coredump_params *cprm)
         if (dump.u_dsize != 0) {
                 dump_start = START_DATA(dump);
                 dump_size = dump.u_dsize << PAGE_SHIFT;
-               if (!dump_write(file, dump_start, dump_size))
+               if (!dump_emit(cprm, dump_start, dump_size))
                         goto end_coredump;
         }
  /* Now prepare to dump the stack area */
         if (dump.u_ssize != 0) {
                 dump_start = START_STACK(dump);
                 dump_size = dump.u_ssize << PAGE_SHIFT;
-               if (!dump_write(file, dump_start, dump_size))
+               if (!dump_emit(cprm, dump_start, dump_size))
                         goto end_coredump;
         }
  end_coredump:
@@ -221,7 +220,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
          * Requires a mmap handler. This prevents people from using a.out
          * as part of an exploit attack against /proc-related vulnerabilities.
          */
-       if (!bprm->file->f_op || !bprm->file->f_op->mmap)
+       if (!bprm->file->f_op->mmap)
                 return -ENOEXEC;
  
         fd_offset = N_TXTOFF(ex);
@@ -374,7 +373,7 @@ static int load_aout_library(struct file *file)
          * Requires a mmap handler. This prevents people from using a.out
          * as part of an exploit attack against /proc-related vulnerabilities.
          */
-       if (!file->f_op || !file->f_op->mmap)
+       if (!file->f_op->mmap)
                 goto out;
  
         if (N_FLAGS(ex))
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c

index 4c94a79..571a423 100644 (file)
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -406,7 +406,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
                 goto out;
         if (!elf_check_arch(interp_elf_ex))
                 goto out;
-       if (!interpreter->f_op || !interpreter->f_op->mmap)
+       if (!interpreter->f_op->mmap)
                 goto out;
  
         /*
@@ -607,7 +607,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
                 goto out;
         if (!elf_check_arch(&loc->elf_ex))
                 goto out;
-       if (!bprm->file->f_op || !bprm->file->f_op->mmap)
+       if (!bprm->file->f_op->mmap)
                 goto out;
  
         /* Now read in all of the header information */
@@ -1028,7 +1028,7 @@ static int load_elf_library(struct file *file)
  
         /* First of all, some simple consistency checks */
         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
-           !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
+           !elf_check_arch(&elf_ex) || !file->f_op->mmap)
                 goto out;
  
         /* Now read in all of the header information */
@@ -1225,35 +1225,17 @@ static int notesize(struct memelfnote *en)
         return sz;
  }
  
-#define DUMP_WRITE(addr, nr, foffset)  \
-       do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
-
-static int alignfile(struct file *file, loff_t *foffset)
-{
-       static const char buf[4] = { 0, };
-       DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
-       return 1;
-}
-
-static int writenote(struct memelfnote *men, struct file *file,
-                       loff_t *foffset)
+static int writenote(struct memelfnote *men, struct coredump_params *cprm)
  {
         struct elf_note en;
         en.n_namesz = strlen(men->name) + 1;
         en.n_descsz = men->datasz;
         en.n_type = men->type;
  
-       DUMP_WRITE(&en, sizeof(en), foffset);
-       DUMP_WRITE(men->name, en.n_namesz, foffset);
-       if (!alignfile(file, foffset))
-               return 0;
-       DUMP_WRITE(men->data, men->datasz, foffset);
-       if (!alignfile(file, foffset))
-               return 0;
-
-       return 1;
+       return dump_emit(cprm, &en, sizeof(en)) &&
+           dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+           dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
  }
-#undef DUMP_WRITE
  
  static void fill_elf_header(struct elfhdr *elf, int segs,
                             u16 machine, u32 flags)
@@ -1392,7 +1374,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
  }
  
  static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
-               siginfo_t *siginfo)
+               const siginfo_t *siginfo)
  {
         mm_segment_t old_fs = get_fs();
         set_fs(KERNEL_DS);
@@ -1599,7 +1581,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
  
  static int fill_note_info(struct elfhdr *elf, int phdrs,
                           struct elf_note_info *info,
-                         siginfo_t *siginfo, struct pt_regs *regs)
+                         const siginfo_t *siginfo, struct pt_regs *regs)
  {
         struct task_struct *dump_task = current;
         const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1702,7 +1684,7 @@ static size_t get_note_info_size(struct elf_note_info *info)
   * process-wide notes are interleaved after the first thread-specific note.
   */
  static int write_note_info(struct elf_note_info *info,
-                          struct file *file, loff_t *foffset)
+                          struct coredump_params *cprm)
  {
         bool first = 1;
         struct elf_thread_core_info *t = info->thread;
@@ -1710,22 +1692,22 @@ static int write_note_info(struct elf_note_info *info,
         do {
                 int i;
  
-               if (!writenote(&t->notes[0], file, foffset))
+               if (!writenote(&t->notes[0], cprm))
                         return 0;
  
-               if (first && !writenote(&info->psinfo, file, foffset))
+               if (first && !writenote(&info->psinfo, cprm))
                         return 0;
-               if (first && !writenote(&info->signote, file, foffset))
+               if (first && !writenote(&info->signote, cprm))
                         return 0;
-               if (first && !writenote(&info->auxv, file, foffset))
+               if (first && !writenote(&info->auxv, cprm))
                         return 0;
                 if (first && info->files.data &&
-                               !writenote(&info->files, file, foffset))
+                               !writenote(&info->files, cprm))
                         return 0;
  
                 for (i = 1; i < info->thread_notes; ++i)
                         if (t->notes[i].data &&
-                           !writenote(&t->notes[i], file, foffset))
+                           !writenote(&t->notes[i], cprm))
                                 return 0;
  
                 first = 0;
@@ -1848,34 +1830,31 @@ static int elf_note_info_init(struct elf_note_info *info)
  
  static int fill_note_info(struct elfhdr *elf, int phdrs,
                           struct elf_note_info *info,
-                         siginfo_t *siginfo, struct pt_regs *regs)
+                         const siginfo_t *siginfo, struct pt_regs *regs)
  {
         struct list_head *t;
+       struct core_thread *ct;
+       struct elf_thread_status *ets;
  
         if (!elf_note_info_init(info))
                 return 0;
  
-       if (siginfo->si_signo) {
-               struct core_thread *ct;
-               struct elf_thread_status *ets;
-
-               for (ct = current->mm->core_state->dumper.next;
-                                               ct; ct = ct->next) {
-                       ets = kzalloc(sizeof(*ets), GFP_KERNEL);
-                       if (!ets)
-                               return 0;
+       for (ct = current->mm->core_state->dumper.next;
+                                       ct; ct = ct->next) {
+               ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+               if (!ets)
+                       return 0;
  
-                       ets->thread = ct->task;
-                       list_add(&ets->list, &info->thread_list);
-               }
+               ets->thread = ct->task;
+               list_add(&ets->list, &info->thread_list);
+       }
  
-               list_for_each(t, &info->thread_list) {
-                       int sz;
+       list_for_each(t, &info->thread_list) {
+               int sz;
  
-                       ets = list_entry(t, struct elf_thread_status, list);
-                       sz = elf_dump_thread_status(siginfo->si_signo, ets);
-                       info->thread_status_size += sz;
-               }
+               ets = list_entry(t, struct elf_thread_status, list);
+               sz = elf_dump_thread_status(siginfo->si_signo, ets);
+               info->thread_status_size += sz;
         }
         /* now collect the dump for the current */
         memset(info->prstatus, 0, sizeof(*info->prstatus));
@@ -1935,13 +1914,13 @@ static size_t get_note_info_size(struct elf_note_info *info)
  }
  
  static int write_note_info(struct elf_note_info *info,
-                          struct file *file, loff_t *foffset)
+                          struct coredump_params *cprm)
  {
         int i;
         struct list_head *t;
  
         for (i = 0; i < info->numnote; i++)
-               if (!writenote(info->notes + i, file, foffset))
+               if (!writenote(info->notes + i, cprm))
                         return 0;
  
         /* write out the thread status notes section */
@@ -1950,7 +1929,7 @@ static int write_note_info(struct elf_note_info *info,
                                 list_entry(t, struct elf_thread_status, list);
  
                 for (i = 0; i < tmp->num_notes; i++)
-                       if (!writenote(&tmp->notes[i], file, foffset))
+                       if (!writenote(&tmp->notes[i], cprm))
                                 return 0;
         }
  
@@ -2046,10 +2025,9 @@ static int elf_core_dump(struct coredump_params *cprm)
         int has_dumped = 0;
         mm_segment_t fs;
         int segs;
-       size_t size = 0;
         struct vm_area_struct *vma, *gate_vma;
         struct elfhdr *elf = NULL;
-       loff_t offset = 0, dataoff, foffset;
+       loff_t offset = 0, dataoff;
         struct elf_note_info info = { };
         struct elf_phdr *phdr4note = NULL;
         struct elf_shdr *shdr4extnum = NULL;
@@ -2105,7 +2083,6 @@ static int elf_core_dump(struct coredump_params *cprm)
  
         offset += sizeof(*elf);                         /* Elf header */
         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
-       foffset = offset;
  
         /* Write notes phdr entry */
         {
@@ -2136,13 +2113,10 @@ static int elf_core_dump(struct coredump_params *cprm)
  
         offset = dataoff;
  
-       size += sizeof(*elf);
-       if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+       if (!dump_emit(cprm, elf, sizeof(*elf)))
                 goto end_coredump;
  
-       size += sizeof(*phdr4note);
-       if (size > cprm->limit
-           || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
+       if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
                 goto end_coredump;
  
         /* Write program headers for segments dump */
@@ -2164,24 +2138,22 @@ static int elf_core_dump(struct coredump_params *cprm)
                         phdr.p_flags |= PF_X;
                 phdr.p_align = ELF_EXEC_PAGESIZE;
  
-               size += sizeof(phdr);
-               if (size > cprm->limit
-                   || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+               if (!dump_emit(cprm, &phdr, sizeof(phdr)))
                         goto end_coredump;
         }
  
-       if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+       if (!elf_core_write_extra_phdrs(cprm, offset))
                 goto end_coredump;
  
         /* write out the notes section */
-       if (!write_note_info(&info, cprm->file, &foffset))
+       if (!write_note_info(&info, cprm))
                 goto end_coredump;
  
-       if (elf_coredump_extra_notes_write(cprm->file, &foffset))
+       if (elf_coredump_extra_notes_write(cprm))
                 goto end_coredump;
  
         /* Align to page */
-       if (!dump_seek(cprm->file, dataoff - foffset))
+       if (!dump_skip(cprm, dataoff - cprm->written))
                 goto end_coredump;
  
         for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2198,26 +2170,21 @@ static int elf_core_dump(struct coredump_params *cprm)
                         page = get_dump_page(addr);
                         if (page) {
                                 void *kaddr = kmap(page);
-                               stop = ((size += PAGE_SIZE) > cprm->limit) ||
-                                       !dump_write(cprm->file, kaddr,
-                                                   PAGE_SIZE);
+                               stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
                                 kunmap(page);
                                 page_cache_release(page);
                         } else
-                               stop = !dump_seek(cprm->file, PAGE_SIZE);
+                               stop = !dump_skip(cprm, PAGE_SIZE);
                         if (stop)
                                 goto end_coredump;
                 }
         }
  
-       if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+       if (!elf_core_write_extra_data(cprm))
                 goto end_coredump;
  
         if (e_phnum == PN_XNUM) {
-               size += sizeof(*shdr4extnum);
-               if (size > cprm->limit
-                   || !dump_write(cprm->file, shdr4extnum,
-                                  sizeof(*shdr4extnum)))
+               if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
                         goto end_coredump;
         }
  
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c

index c166f32..fe2a643 100644 (file)
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -111,7 +111,7 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
                 return 0;
         if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr))
                 return 0;
-       if (!file->f_op || !file->f_op->mmap)
+       if (!file->f_op->mmap)
                 return 0;
         return 1;
  }
@@ -1267,35 +1267,17 @@ static int notesize(struct memelfnote *en)
  
  /* #define DEBUG */
  
-#define DUMP_WRITE(addr, nr, foffset)  \
-       do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
-
-static int alignfile(struct file *file, loff_t *foffset)
-{
-       static const char buf[4] = { 0, };
-       DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
-       return 1;
-}
-
-static int writenote(struct memelfnote *men, struct file *file,
-                       loff_t *foffset)
+static int writenote(struct memelfnote *men, struct coredump_params *cprm)
  {
         struct elf_note en;
         en.n_namesz = strlen(men->name) + 1;
         en.n_descsz = men->datasz;
         en.n_type = men->type;
  
-       DUMP_WRITE(&en, sizeof(en), foffset);
-       DUMP_WRITE(men->name, en.n_namesz, foffset);
-       if (!alignfile(file, foffset))
-               return 0;
-       DUMP_WRITE(men->data, men->datasz, foffset);
-       if (!alignfile(file, foffset))
-               return 0;
-
-       return 1;
+       return dump_emit(cprm, &en, sizeof(en)) &&
+               dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+               dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
  }
-#undef DUMP_WRITE
  
  static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
  {
@@ -1500,66 +1482,40 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
  /*
   * dump the segments for an MMU process
   */
-#ifdef CONFIG_MMU
-static int elf_fdpic_dump_segments(struct file *file, size_t *size,
-                          unsigned long *limit, unsigned long mm_flags)
+static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
  {
         struct vm_area_struct *vma;
-       int err = 0;
  
         for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
                 unsigned long addr;
  
-               if (!maydump(vma, mm_flags))
+               if (!maydump(vma, cprm->mm_flags))
                         continue;
  
+#ifdef CONFIG_MMU
                 for (addr = vma->vm_start; addr < vma->vm_end;
                                                         addr += PAGE_SIZE) {
+                       bool res;
                         struct page *page = get_dump_page(addr);
                         if (page) {
                                 void *kaddr = kmap(page);
-                               *size += PAGE_SIZE;
-                               if (*size > *limit)
-                                       err = -EFBIG;
-                               else if (!dump_write(file, kaddr, PAGE_SIZE))
-                                       err = -EIO;
+                               res = dump_emit(cprm, kaddr, PAGE_SIZE);
                                 kunmap(page);
                                 page_cache_release(page);
-                       } else if (!dump_seek(file, PAGE_SIZE))
-                               err = -EFBIG;
-                       if (err)
-                               goto out;
+                       } else {
+                               res = dump_skip(cprm, PAGE_SIZE);
+                       }
+                       if (!res)
+                               return false;
                 }
-       }
-out:
-       return err;
-}
-#endif
-
-/*
- * dump the segments for a NOMMU process
- */
-#ifndef CONFIG_MMU
-static int elf_fdpic_dump_segments(struct file *file, size_t *size,
-                          unsigned long *limit, unsigned long mm_flags)
-{
-       struct vm_area_struct *vma;
-
-       for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
-               if (!maydump(vma, mm_flags))
-                       continue;
-
-               if ((*size += PAGE_SIZE) > *limit)
-                       return -EFBIG;
-
-               if (!dump_write(file, (void *) vma->vm_start,
+#else
+               if (!dump_emit(cprm, (void *) vma->vm_start,
                                 vma->vm_end - vma->vm_start))
-                       return -EIO;
+                       return false;
+#endif
         }
-
-       return 0;
+       return true;
  }
-#endif
  
  static size_t elf_core_vma_data_size(unsigned long mm_flags)
  {
@@ -1585,11 +1541,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
         int has_dumped = 0;
         mm_segment_t fs;
         int segs;
-       size_t size = 0;
         int i;
         struct vm_area_struct *vma;
         struct elfhdr *elf = NULL;
-       loff_t offset = 0, dataoff, foffset;
+       loff_t offset = 0, dataoff;
         int numnote;
         struct memelfnote *notes = NULL;
         struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
@@ -1606,6 +1561,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
         struct elf_shdr *shdr4extnum = NULL;
         Elf_Half e_phnum;
         elf_addr_t e_shoff;
+       struct core_thread *ct;
+       struct elf_thread_status *tmp;
  
         /*
          * We no longer stop all VM operations.
@@ -1641,28 +1598,23 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
                 goto cleanup;
  #endif
  
-       if (cprm->siginfo->si_signo) {
-               struct core_thread *ct;
-               struct elf_thread_status *tmp;
-
-               for (ct = current->mm->core_state->dumper.next;
-                                               ct; ct = ct->next) {
-                       tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
-                       if (!tmp)
-                               goto cleanup;
+       for (ct = current->mm->core_state->dumper.next;
+                                       ct; ct = ct->next) {
+               tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+               if (!tmp)
+                       goto cleanup;
  
-                       tmp->thread = ct->task;
-                       list_add(&tmp->list, &thread_list);
-               }
+               tmp->thread = ct->task;
+               list_add(&tmp->list, &thread_list);
+       }
  
-               list_for_each(t, &thread_list) {
-                       struct elf_thread_status *tmp;
-                       int sz;
+       list_for_each(t, &thread_list) {
+               struct elf_thread_status *tmp;
+               int sz;
  
-                       tmp = list_entry(t, struct elf_thread_status, list);
-                       sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
-                       thread_status_size += sz;
-               }
+               tmp = list_entry(t, struct elf_thread_status, list);
+               sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
+               thread_status_size += sz;
         }
  
         /* now collect the dump for the current */
@@ -1720,7 +1672,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
  
         offset += sizeof(*elf);                         /* Elf header */
         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
-       foffset = offset;
  
         /* Write notes phdr entry */
         {
@@ -1755,13 +1706,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
  
         offset = dataoff;
  
-       size += sizeof(*elf);
-       if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+       if (!dump_emit(cprm, elf, sizeof(*elf)))
                 goto end_coredump;
  
-       size += sizeof(*phdr4note);
-       if (size > cprm->limit
-           || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
+       if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
                 goto end_coredump;
  
         /* write program headers for segments dump */
@@ -1785,18 +1733,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
                         phdr.p_flags |= PF_X;
                 phdr.p_align = ELF_EXEC_PAGESIZE;
  
-               size += sizeof(phdr);
-               if (size > cprm->limit
-                   || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+               if (!dump_emit(cprm, &phdr, sizeof(phdr)))
                         goto end_coredump;
         }
  
-       if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+       if (!elf_core_write_extra_phdrs(cprm, offset))
                 goto end_coredump;
  
         /* write out the notes section */
         for (i = 0; i < numnote; i++)
-               if (!writenote(notes + i, cprm->file, &foffset))
+               if (!writenote(notes + i, cprm))
                         goto end_coredump;
  
         /* write out the thread status notes section */
@@ -1805,25 +1751,21 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
                                 list_entry(t, struct elf_thread_status, list);
  
                 for (i = 0; i < tmp->num_notes; i++)
-                       if (!writenote(&tmp->notes[i], cprm->file, &foffset))
+                       if (!writenote(&tmp->notes[i], cprm))
                                 goto end_coredump;
         }
  
-       if (!dump_seek(cprm->file, dataoff - foffset))
+       if (!dump_skip(cprm, dataoff - cprm->written))
                 goto end_coredump;
  
-       if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
-                                   cprm->mm_flags) < 0)
+       if (!elf_fdpic_dump_segments(cprm))
                 goto end_coredump;
  
-       if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+       if (!elf_core_write_extra_data(cprm))
                 goto end_coredump;
  
         if (e_phnum == PN_XNUM) {
-               size += sizeof(*shdr4extnum);
-               if (size > cprm->limit
-                   || !dump_write(cprm->file, shdr4extnum,
-                                  sizeof(*shdr4extnum)))
+               if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
                         goto end_coredump;
         }
  
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c

index 037a3e2..f37b08c 100644 (file)
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -38,7 +38,7 @@ static int load_em86(struct linux_binprm *bprm)
         /* First of all, some simple consistency checks */
         if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
                 (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
-               (!bprm->file->f_op || !bprm->file->f_op->mmap)) {
+               !bprm->file->f_op->mmap) {
                         return -ENOEXEC;
         }
  
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c

index 00baf14..57e17fe 100644 (file)
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
                 _debug("discard tail %llx", oi_size);
                 newattrs.ia_valid = ATTR_SIZE;
                 newattrs.ia_size = oi_size & PAGE_MASK;
-               ret = notify_change(object->backer, &newattrs);
+               ret = notify_change(object->backer, &newattrs, NULL);
                 if (ret < 0)
                         goto truncate_failed;
         }
  
         newattrs.ia_valid = ATTR_SIZE;
         newattrs.ia_size = ni_size;
-       ret = notify_change(object->backer, &newattrs);
+       ret = notify_change(object->backer, &newattrs, NULL);
  
  truncate_failed:
         mutex_unlock(&object->backer->d_inode->i_mutex);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c

index f4a08d7..ca65f39 100644 (file)
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
                 if (ret < 0) {
                         cachefiles_io_error(cache, "Unlink security error");
                 } else {
-                       ret = vfs_unlink(dir->d_inode, rep);
+                       ret = vfs_unlink(dir->d_inode, rep, NULL);
  
                         if (preemptive)
                                 cachefiles_mark_object_buried(cache, rep);
@@ -396,7 +396,7 @@ try_again:
                 cachefiles_io_error(cache, "Rename security error %d", ret);
         } else {
                 ret = vfs_rename(dir->d_inode, rep,
-                                cache->graveyard->d_inode, grave);
+                                cache->graveyard->d_inode, grave, NULL);
                 if (ret != 0 && ret != -ENOMEM)
                         cachefiles_io_error(cache,
                                             "Rename failed with error %d", ret);
diff --git a/fs/char_dev.c b/fs/char_dev.c

index afc2bb6..94b5f60 100644 (file)
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -368,6 +368,7 @@ void cdev_put(struct cdev *p)
   */
  static int chrdev_open(struct inode *inode, struct file *filp)
  {
+       const struct file_operations *fops;
         struct cdev *p;
         struct cdev *new = NULL;
         int ret = 0;
@@ -400,10 +401,11 @@ static int chrdev_open(struct inode *inode, struct file *filp)
                 return ret;
  
         ret = -ENXIO;
-       filp->f_op = fops_get(p->ops);
-       if (!filp->f_op)
+       fops = fops_get(p->ops);
+       if (!fops)
                 goto out_cdev_put;
  
+       replace_fops(filp, fops);
         if (filp->f_op->open) {
                 ret = filp->f_op->open(inode, filp);
                 if (ret)
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h

index 37e4a72..9409fa1 100644 (file)
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -65,5 +65,6 @@ struct cifs_sb_info {
         char   *mountdata; /* options received at mount time or via DFS refs */
         struct backing_dev_info bdi;
         struct delayed_work prune_tlinks;
+       struct rcu_head rcu;
  };
  #endif                         /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c

index 77fc5e1..849f613 100644 (file)
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -862,7 +862,7 @@ const struct inode_operations cifs_file_inode_ops = {
  const struct inode_operations cifs_symlink_inode_ops = {
         .readlink = generic_readlink,
         .follow_link = cifs_follow_link,
-       .put_link = cifs_put_link,
+       .put_link = kfree_put_link,
         .permission = cifs_permission,
         /* BB add the following two eventually */
         /* revalidate: cifs_revalidate,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h

index 6d0b072..26a754f 100644 (file)
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -115,8 +115,6 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
  
  /* Functions related to symlinks */
  extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
-extern void cifs_put_link(struct dentry *direntry,
-                         struct nameidata *nd, void *);
  extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
                          int buflen);
  extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c

index 62a5514..8813ff7 100644 (file)
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3770,6 +3770,13 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
         return rc;
  }
  
+static void delayed_free(struct rcu_head *p)
+{
+       struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu);
+       unload_nls(sbi->local_nls);
+       kfree(sbi);
+}
+
  void
  cifs_umount(struct cifs_sb_info *cifs_sb)
  {
@@ -3794,8 +3801,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
  
         bdi_destroy(&cifs_sb->bdi);
         kfree(cifs_sb->mountdata);
-       unload_nls(cifs_sb->local_nls);
-       kfree(cifs_sb);
+       call_rcu(&cifs_sb->rcu, delayed_free);
  }
  
  int
diff --git a/fs/cifs/link.c b/fs/cifs/link.c

index 7e36ceb..cc02347 100644 (file)
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -621,10 +621,3 @@ symlink_exit:
         free_xid(xid);
         return rc;
  }
-
-void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
-{
-       char *p = nd_get_link(nd);
-       if (!IS_ERR(p))
-               kfree(p);
-}
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h

index cc0ea9f..e7550cb 100644 (file)
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -40,7 +40,7 @@ extern const struct file_operations coda_ioctl_operations;
  int coda_open(struct inode *i, struct file *f);
  int coda_release(struct inode *i, struct file *f);
  int coda_permission(struct inode *inode, int mask);
-int coda_revalidate_inode(struct dentry *);
+int coda_revalidate_inode(struct inode *);
  int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
  int coda_setattr(struct dentry *, struct iattr *);
  
diff --git a/fs/coda/dir.c b/fs/coda/dir.c

index 190effc..5efbb5e 100644 (file)
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -387,9 +387,6 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
         BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
         host_file = cfi->cfi_container;
  
-       if (!host_file->f_op)
-               return -ENOTDIR;
-
         if (host_file->f_op->iterate) {
                 struct inode *host_inode = file_inode(host_file);
                 mutex_lock(&host_inode->i_mutex);
@@ -566,13 +563,12 @@ static int coda_dentry_delete(const struct dentry * dentry)
   * cache manager Venus issues a downcall to the kernel when this 
   * happens 
   */
-int coda_revalidate_inode(struct dentry *dentry)
+int coda_revalidate_inode(struct inode *inode)
  {
         struct coda_vattr attr;
         int error;
         int old_mode;
         ino_t old_ino;
-       struct inode *inode = dentry->d_inode;
         struct coda_inode_info *cii = ITOC(inode);
  
         if (!cii->c_flags)
diff --git a/fs/coda/file.c b/fs/coda/file.c

index 380b798..9e83b77 100644 (file)
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -36,7 +36,7 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
         BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
         host_file = cfi->cfi_container;
  
-       if (!host_file->f_op || !host_file->f_op->read)
+       if (!host_file->f_op->read)
                 return -EINVAL;
  
         return host_file->f_op->read(host_file, buf, count, ppos);
@@ -75,7 +75,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
         BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
         host_file = cfi->cfi_container;
  
-       if (!host_file->f_op || !host_file->f_op->write)
+       if (!host_file->f_op->write)
                 return -EINVAL;
  
         host_inode = file_inode(host_file);
@@ -105,7 +105,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
         BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
         host_file = cfi->cfi_container;
  
-       if (!host_file->f_op || !host_file->f_op->mmap)
+       if (!host_file->f_op->mmap)
                 return -ENODEV;
  
         coda_inode = file_inode(coda_file);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c

index 4dcc0d8..506de34 100644 (file)
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -257,7 +257,7 @@ static void coda_evict_inode(struct inode *inode)
  
  int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
-       int err = coda_revalidate_inode(dentry);
+       int err = coda_revalidate_inode(dentry->d_inode);
         if (!err)
                 generic_fillattr(dentry->d_inode, stat);
         return err;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c

index 5d19acf..dc52e13 100644 (file)
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1583,13 +1583,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
                 /*FALL THROUGH*/
  
         default:
-               if (f.file->f_op && f.file->f_op->compat_ioctl) {
+               if (f.file->f_op->compat_ioctl) {
                         error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
                         if (error != -ENOIOCTLCMD)
                                 goto out_fput;
                 }
  
-               if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
+               if (!f.file->f_op->unlocked_ioctl)
                         goto do_ioctl;
                 break;
         }
diff --git a/fs/coredump.c b/fs/coredump.c

index 9bdeca1..62406b6 100644 (file)
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
         return err;
  }
  
-void do_coredump(siginfo_t *siginfo)
+void do_coredump(const siginfo_t *siginfo)
  {
         struct core_state core_state;
         struct core_name cn;
@@ -645,7 +645,7 @@ void do_coredump(siginfo_t *siginfo)
                  */
                 if (!uid_eq(inode->i_uid, current_fsuid()))
                         goto close_fail;
-               if (!cprm.file->f_op || !cprm.file->f_op->write)
+               if (!cprm.file->f_op->write)
                         goto close_fail;
                 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
                         goto close_fail;
@@ -685,40 +685,55 @@ fail:
   * do on a core-file: use only these functions to write out all the
   * necessary info.
   */
-int dump_write(struct file *file, const void *addr, int nr)
+int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
  {
-       return !dump_interrupted() &&
-               access_ok(VERIFY_READ, addr, nr) &&
-               file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+       struct file *file = cprm->file;
+       loff_t pos = file->f_pos;
+       ssize_t n;
+       if (cprm->written + nr > cprm->limit)
+               return 0;
+       while (nr) {
+               if (dump_interrupted())
+                       return 0;
+               n = vfs_write(file, addr, nr, &pos);
+               if (n <= 0)
+                       return 0;
+               file->f_pos = pos;
+               cprm->written += n;
+               nr -= n;
+       }
+       return 1;
  }
-EXPORT_SYMBOL(dump_write);
+EXPORT_SYMBOL(dump_emit);
  
-int dump_seek(struct file *file, loff_t off)
+int dump_skip(struct coredump_params *cprm, size_t nr)
  {
-       int ret = 1;
-
+       static char zeroes[PAGE_SIZE];
+       struct file *file = cprm->file;
         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+               if (cprm->written + nr > cprm->limit)
+                       return 0;
                 if (dump_interrupted() ||
-                   file->f_op->llseek(file, off, SEEK_CUR) < 0)
+                   file->f_op->llseek(file, nr, SEEK_CUR) < 0)
                         return 0;
+               cprm->written += nr;
+               return 1;
         } else {
-               char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
-               if (!buf)
-                       return 0;
-               while (off > 0) {
-                       unsigned long n = off;
-
-                       if (n > PAGE_SIZE)
-                               n = PAGE_SIZE;
-                       if (!dump_write(file, buf, n)) {
-                               ret = 0;
-                               break;
-                       }
-                       off -= n;
+               while (nr > PAGE_SIZE) {
+                       if (!dump_emit(cprm, zeroes, PAGE_SIZE))
+                               return 0;
+                       nr -= PAGE_SIZE;
                 }
-               free_page((unsigned long)buf);
+               return dump_emit(cprm, zeroes, nr);
         }
-       return ret;
  }
-EXPORT_SYMBOL(dump_seek);
+EXPORT_SYMBOL(dump_skip);
+
+int dump_align(struct coredump_params *cprm, int align)
+{
+       unsigned mod = cprm->written & (align - 1);
+       if (align & (align - 1))
+               return -EINVAL;
+       return mod ? dump_skip(cprm, align - mod) : 0;
+}
+EXPORT_SYMBOL(dump_align);
diff --git a/fs/dcache.c b/fs/dcache.c

index ae6ebb8..1f24cd6 100644 (file)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -343,6 +343,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
         __releases(dentry->d_inode->i_lock)
  {
         struct inode *inode = dentry->d_inode;
+       __d_clear_type(dentry);
         dentry->d_inode = NULL;
         hlist_del_init(&dentry->d_alias);
         dentry_rcuwalk_barrier(dentry);
@@ -483,27 +484,6 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
         return parent;
  }
  
-/*
- * Unhash a dentry without inserting an RCU walk barrier or checking that
- * dentry->d_lock is locked.  The caller must take care of that, if
- * appropriate.
- */
-static void __d_shrink(struct dentry *dentry)
-{
-       if (!d_unhashed(dentry)) {
-               struct hlist_bl_head *b;
-               if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
-                       b = &dentry->d_sb->s_anon;
-               else
-                       b = d_hash(dentry->d_parent, dentry->d_name.hash);
-
-               hlist_bl_lock(b);
-               __hlist_bl_del(&dentry->d_hash);
-               dentry->d_hash.pprev = NULL;
-               hlist_bl_unlock(b);
-       }
-}
-
  /**
   * d_drop - drop a dentry
   * @dentry: dentry to drop
@@ -522,7 +502,21 @@ static void __d_shrink(struct dentry *dentry)
  void __d_drop(struct dentry *dentry)
  {
         if (!d_unhashed(dentry)) {
-               __d_shrink(dentry);
+               struct hlist_bl_head *b;
+               /*
+                * Hashed dentries are normally on the dentry hashtable,
+                * with the exception of those newly allocated by
+                * d_obtain_alias, which are always IS_ROOT:
+                */
+               if (unlikely(IS_ROOT(dentry)))
+                       b = &dentry->d_sb->s_anon;
+               else
+                       b = d_hash(dentry->d_parent, dentry->d_name.hash);
+
+               hlist_bl_lock(b);
+               __hlist_bl_del(&dentry->d_hash);
+               dentry->d_hash.pprev = NULL;
+               hlist_bl_unlock(b);
                 dentry_rcuwalk_barrier(dentry);
         }
  }
@@ -1075,116 +1069,6 @@ void shrink_dcache_sb(struct super_block *sb)
  }
  EXPORT_SYMBOL(shrink_dcache_sb);
  
-/*
- * destroy a single subtree of dentries for unmount
- * - see the comments on shrink_dcache_for_umount() for a description of the
- *   locking
- */
-static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
-{
-       struct dentry *parent;
-
-       BUG_ON(!IS_ROOT(dentry));
-
-       for (;;) {
-               /* descend to the first leaf in the current subtree */
-               while (!list_empty(&dentry->d_subdirs))
-                       dentry = list_entry(dentry->d_subdirs.next,
-                                           struct dentry, d_u.d_child);
-
-               /* consume the dentries from this leaf up through its parents
-                * until we find one with children or run out altogether */
-               do {
-                       struct inode *inode;
-
-                       /*
-                        * inform the fs that this dentry is about to be
-                        * unhashed and destroyed.
-                        */
-                       if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
-                           !d_unhashed(dentry))
-                               dentry->d_op->d_prune(dentry);
-
-                       dentry_lru_del(dentry);
-                       __d_shrink(dentry);
-
-                       if (dentry->d_lockref.count != 0) {
-                               printk(KERN_ERR
-                                      "BUG: Dentry %p{i=%lx,n=%s}"
-                                      " still in use (%d)"
-                                      " [unmount of %s %s]\n",
-                                      dentry,
-                                      dentry->d_inode ?
-                                      dentry->d_inode->i_ino : 0UL,
-                                      dentry->d_name.name,
-                                      dentry->d_lockref.count,
-                                      dentry->d_sb->s_type->name,
-                                      dentry->d_sb->s_id);
-                               BUG();
-                       }
-
-                       if (IS_ROOT(dentry)) {
-                               parent = NULL;
-                               list_del(&dentry->d_u.d_child);
-                       } else {
-                               parent = dentry->d_parent;
-                               parent->d_lockref.count--;
-                               list_del(&dentry->d_u.d_child);
-                       }
-
-                       inode = dentry->d_inode;
-                       if (inode) {
-                               dentry->d_inode = NULL;
-                               hlist_del_init(&dentry->d_alias);
-                               if (dentry->d_op && dentry->d_op->d_iput)
-                                       dentry->d_op->d_iput(dentry, inode);
-                               else
-                                       iput(inode);
-                       }
-
-                       d_free(dentry);
-
-                       /* finished when we fall off the top of the tree,
-                        * otherwise we ascend to the parent and move to the
-                        * next sibling if there is one */
-                       if (!parent)
-                               return;
-                       dentry = parent;
-               } while (list_empty(&dentry->d_subdirs));
-
-               dentry = list_entry(dentry->d_subdirs.next,
-                                   struct dentry, d_u.d_child);
-       }
-}
-
-/*
- * destroy the dentries attached to a superblock on unmounting
- * - we don't need to use dentry->d_lock because:
- *   - the superblock is detached from all mountings and open files, so the
- *     dentry trees will not be rearranged by the VFS
- *   - s_umount is write-locked, so the memory pressure shrinker will ignore
- *     any dentries belonging to this superblock that it comes across
- *   - the filesystem itself is no longer permitted to rearrange the dentries
- *     in this superblock
- */
-void shrink_dcache_for_umount(struct super_block *sb)
-{
-       struct dentry *dentry;
-
-       if (down_read_trylock(&sb->s_umount))
-               BUG();
-
-       dentry = sb->s_root;
-       sb->s_root = NULL;
-       dentry->d_lockref.count--;
-       shrink_dcache_for_umount_subtree(dentry);
-
-       while (!hlist_bl_empty(&sb->s_anon)) {
-               dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
-               shrink_dcache_for_umount_subtree(dentry);
-       }
-}
-
  /*
   * This tries to ascend one level of parenthood, but
   * we can race with renaming, so we need to re-check
@@ -1478,6 +1362,91 @@ void shrink_dcache_parent(struct dentry *parent)
  }
  EXPORT_SYMBOL(shrink_dcache_parent);
  
+static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry)
+{
+       struct select_data *data = _data;
+       enum d_walk_ret ret = D_WALK_CONTINUE;
+
+       if (dentry->d_lockref.count) {
+               dentry_lru_del(dentry);
+               if (likely(!list_empty(&dentry->d_subdirs)))
+                       goto out;
+               if (dentry == data->start && dentry->d_lockref.count == 1)
+                       goto out;
+               printk(KERN_ERR
+                      "BUG: Dentry %p{i=%lx,n=%s}"
+                      " still in use (%d)"
+                      " [unmount of %s %s]\n",
+                      dentry,
+                      dentry->d_inode ?
+                      dentry->d_inode->i_ino : 0UL,
+                      dentry->d_name.name,
+                      dentry->d_lockref.count,
+                      dentry->d_sb->s_type->name,
+                      dentry->d_sb->s_id);
+               BUG();
+       } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+               /*
+                * We can't use d_lru_shrink_move() because we
+                * need to get the global LRU lock and do the
+                * LRU accounting.
+                */
+               if (dentry->d_flags & DCACHE_LRU_LIST)
+                       d_lru_del(dentry);
+               d_shrink_add(dentry, &data->dispose);
+               data->found++;
+               ret = D_WALK_NORETRY;
+       }
+out:
+       if (data->found && need_resched())
+               ret = D_WALK_QUIT;
+       return ret;
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+       struct dentry *dentry;
+
+       if (down_read_trylock(&sb->s_umount))
+               BUG();
+
+       dentry = sb->s_root;
+       sb->s_root = NULL;
+       for (;;) {
+               struct select_data data;
+
+               INIT_LIST_HEAD(&data.dispose);
+               data.start = dentry;
+               data.found = 0;
+
+               d_walk(dentry, &data, umount_collect, NULL);
+               if (!data.found)
+                       break;
+
+               shrink_dentry_list(&data.dispose);
+               cond_resched();
+       }
+       d_drop(dentry);
+       dput(dentry);
+
+       while (!hlist_bl_empty(&sb->s_anon)) {
+               struct select_data data;
+               dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
+
+               INIT_LIST_HEAD(&data.dispose);
+               data.start = NULL;
+               data.found = 0;
+
+               d_walk(dentry, &data, umount_collect, NULL);
+               if (data.found)
+                       shrink_dentry_list(&data.dispose);
+               cond_resched();
+       }
+}
+
  static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
  {
         struct select_data *data = _data;
@@ -1638,12 +1607,17 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
  }
  EXPORT_SYMBOL(d_alloc);
  
+/**
+ * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
+ * @sb: the superblock
+ * @name: qstr of the name
+ *
+ * For a filesystem that just pins its dentries in memory and never
+ * performs lookups at all, return an unhashed IS_ROOT dentry.
+ */
  struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
  {
-       struct dentry *dentry = __d_alloc(sb, name);
-       if (dentry)
-               dentry->d_flags |= DCACHE_DISCONNECTED;
-       return dentry;
+       return __d_alloc(sb, name);
  }
  EXPORT_SYMBOL(d_alloc_pseudo);
  
@@ -1685,14 +1659,42 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
  }
  EXPORT_SYMBOL(d_set_d_op);
  
+static unsigned d_flags_for_inode(struct inode *inode)
+{
+       unsigned add_flags = DCACHE_FILE_TYPE;
+
+       if (!inode)
+               return DCACHE_MISS_TYPE;
+
+       if (S_ISDIR(inode->i_mode)) {
+               add_flags = DCACHE_DIRECTORY_TYPE;
+               if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
+                       if (unlikely(!inode->i_op->lookup))
+                               add_flags = DCACHE_AUTODIR_TYPE;
+                       else
+                               inode->i_opflags |= IOP_LOOKUP;
+               }
+       } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
+               if (unlikely(inode->i_op->follow_link))
+                       add_flags = DCACHE_SYMLINK_TYPE;
+               else
+                       inode->i_opflags |= IOP_NOFOLLOW;
+       }
+
+       if (unlikely(IS_AUTOMOUNT(inode)))
+               add_flags |= DCACHE_NEED_AUTOMOUNT;
+       return add_flags;
+}
+
  static void __d_instantiate(struct dentry *dentry, struct inode *inode)
  {
+       unsigned add_flags = d_flags_for_inode(inode);
+
         spin_lock(&dentry->d_lock);
-       if (inode) {
-               if (unlikely(IS_AUTOMOUNT(inode)))
-                       dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+       dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
+       dentry->d_flags |= add_flags;
+       if (inode)
                 hlist_add_head(&dentry->d_alias, &inode->i_dentry);
-       }
         dentry->d_inode = inode;
         dentry_rcuwalk_barrier(dentry);
         spin_unlock(&dentry->d_lock);
@@ -1801,6 +1803,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
  
  EXPORT_SYMBOL(d_instantiate_unique);
  
+/**
+ * d_instantiate_no_diralias - instantiate a non-aliased dentry
+ * @entry: dentry to complete
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry.  If a directory alias is found, then
+ * return an error (and drop inode).  Together with d_materialise_unique() this
+ * guarantees that a directory inode may never have more than one alias.
+ */
+int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
+{
+       BUG_ON(!hlist_unhashed(&entry->d_alias));
+
+       spin_lock(&inode->i_lock);
+       if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
+               spin_unlock(&inode->i_lock);
+               iput(inode);
+               return -EBUSY;
+       }
+       __d_instantiate(entry, inode);
+       spin_unlock(&inode->i_lock);
+       security_d_instantiate(entry, inode);
+
+       return 0;
+}
+EXPORT_SYMBOL(d_instantiate_no_diralias);
+
  struct dentry *d_make_root(struct inode *root_inode)
  {
         struct dentry *res = NULL;
@@ -1870,6 +1899,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
         static const struct qstr anonstring = QSTR_INIT("/", 1);
         struct dentry *tmp;
         struct dentry *res;
+       unsigned add_flags;
  
         if (!inode)
                 return ERR_PTR(-ESTALE);
@@ -1895,9 +1925,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
         }
  
         /* attach a disconnected dentry */
+       add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
+
         spin_lock(&tmp->d_lock);
         tmp->d_inode = inode;
-       tmp->d_flags |= DCACHE_DISCONNECTED;
+       tmp->d_flags |= add_flags;
         hlist_add_head(&tmp->d_alias, &inode->i_dentry);
         hlist_bl_lock(&tmp->d_sb->s_anon);
         hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
@@ -2725,7 +2757,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
         spin_unlock(&dentry->d_lock);
  
         /* anon->d_lock still locked, returns locked */
-       anon->d_flags &= ~DCACHE_DISCONNECTED;
  }
  
  /**
@@ -2885,23 +2916,28 @@ static int prepend_path(const struct path *path,
         struct vfsmount *vfsmnt = path->mnt;
         struct mount *mnt = real_mount(vfsmnt);
         int error = 0;
-       unsigned seq = 0;
+       unsigned seq, m_seq = 0;
         char *bptr;
         int blen;
  
         rcu_read_lock();
+restart_mnt:
+       read_seqbegin_or_lock(&mount_lock, &m_seq);
+       seq = 0;
  restart:
         bptr = *buffer;
         blen = *buflen;
+       error = 0;
         read_seqbegin_or_lock(&rename_lock, &seq);
         while (dentry != root->dentry || vfsmnt != root->mnt) {
                 struct dentry * parent;
  
                 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+                       struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
                         /* Global root? */
-                       if (mnt_has_parent(mnt)) {
-                               dentry = mnt->mnt_mountpoint;
-                               mnt = mnt->mnt_parent;
+                       if (mnt != parent) {
+                               dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
+                               mnt = parent;
                                 vfsmnt = &mnt->mnt;
                                 continue;
                         }
@@ -2935,6 +2971,11 @@ restart:
                 goto restart;
         }
         done_seqretry(&rename_lock, seq);
+       if (need_seqretry(&mount_lock, m_seq)) {
+               m_seq = 1;
+               goto restart_mnt;
+       }
+       done_seqretry(&mount_lock, m_seq);
  
         if (error >= 0 && bptr == *buffer) {
                 if (--blen < 0)
@@ -2971,9 +3012,7 @@ char *__d_path(const struct path *path,
         int error;
  
         prepend(&res, &buflen, "\0", 1);
-       br_read_lock(&vfsmount_lock);
         error = prepend_path(path, root, &res, &buflen);
-       br_read_unlock(&vfsmount_lock);
  
         if (error < 0)
                 return ERR_PTR(error);
@@ -2990,9 +3029,7 @@ char *d_absolute_path(const struct path *path,
         int error;
  
         prepend(&res, &buflen, "\0", 1);
-       br_read_lock(&vfsmount_lock);
         error = prepend_path(path, &root, &res, &buflen);
-       br_read_unlock(&vfsmount_lock);
  
         if (error > 1)
                 error = -EINVAL;
@@ -3067,9 +3104,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
  
         rcu_read_lock();
         get_fs_root_rcu(current->fs, &root);
-       br_read_lock(&vfsmount_lock);
         error = path_with_deleted(path, &root, &res, &buflen);
-       br_read_unlock(&vfsmount_lock);
         rcu_read_unlock();
  
         if (error < 0)
@@ -3224,7 +3259,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
         get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
  
         error = -ENOENT;
-       br_read_lock(&vfsmount_lock);
         if (!d_unlinked(pwd.dentry)) {
                 unsigned long len;
                 char *cwd = page + PATH_MAX;
@@ -3232,7 +3266,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
  
                 prepend(&cwd, &buflen, "\0", 1);
                 error = prepend_path(&pwd, &root, &cwd, &buflen);
-               br_read_unlock(&vfsmount_lock);
                 rcu_read_unlock();
  
                 if (error < 0)
@@ -3253,7 +3286,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                                 error = -EFAULT;
                 }
         } else {
-               br_read_unlock(&vfsmount_lock);
                 rcu_read_unlock();
         }
  
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c

index 88556dc..d5abafd 100644 (file)
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -706,9 +706,7 @@ static int lkb_idr_is_local(int id, void *p, void *data)
  {
         struct dlm_lkb *lkb = p;
  
-       if (!lkb->lkb_nodeid)
-               return 1;
-       return 0;
+       return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
  }
  
  static int lkb_idr_is_any(int id, void *p, void *data)
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c

index bf12ba5..4000f6b 100644 (file)
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,15 +44,15 @@
   */
  static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
  {
-       struct dentry *lower_dentry;
-       int rc = 1;
+       struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+       int rc;
+
+       if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
+               return 1;
  
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
-               goto out;
         rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
         if (dentry->d_inode) {
                 struct inode *lower_inode =
@@ -60,12 +60,17 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
  
                 fsstack_copy_attr_all(dentry->d_inode, lower_inode);
         }
-out:
         return rc;
  }
  
  struct kmem_cache *ecryptfs_dentry_info_cache;
  
+static void ecryptfs_dentry_free_rcu(struct rcu_head *head)
+{
+       kmem_cache_free(ecryptfs_dentry_info_cache,
+               container_of(head, struct ecryptfs_dentry_info, rcu));
+}
+
  /**
   * ecryptfs_d_release
   * @dentry: The ecryptfs dentry
@@ -74,15 +79,11 @@ struct kmem_cache *ecryptfs_dentry_info_cache;
   */
  static void ecryptfs_d_release(struct dentry *dentry)
  {
-       if (ecryptfs_dentry_to_private(dentry)) {
-               if (ecryptfs_dentry_to_lower(dentry)) {
-                       dput(ecryptfs_dentry_to_lower(dentry));
-                       mntput(ecryptfs_dentry_to_lower_mnt(dentry));
-               }
-               kmem_cache_free(ecryptfs_dentry_info_cache,
-                               ecryptfs_dentry_to_private(dentry));
+       struct ecryptfs_dentry_info *p = dentry->d_fsdata;
+       if (p) {
+               path_put(&p->lower_path);
+               call_rcu(&p->rcu, ecryptfs_dentry_free_rcu);
         }
-       return;
  }
  
  const struct dentry_operations ecryptfs_dops = {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h

index df19d34..90d1882 100644 (file)
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -261,7 +261,10 @@ struct ecryptfs_inode_info {
   * vfsmount too. */
  struct ecryptfs_dentry_info {
         struct path lower_path;
-       struct ecryptfs_crypt_stat *crypt_stat;
+       union {
+               struct ecryptfs_crypt_stat *crypt_stat;
+               struct rcu_head rcu;
+       };
  };
  
  /**
@@ -512,13 +515,6 @@ ecryptfs_dentry_to_lower(struct dentry *dentry)
         return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry;
  }
  
-static inline void
-ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
-{
-       ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry =
-               lower_dentry;
-}
-
  static inline struct vfsmount *
  ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
  {
@@ -531,13 +527,6 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry)
         return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
  }
  
-static inline void
-ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
-{
-       ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt =
-               lower_mnt;
-}
-
  #define ecryptfs_printk(type, fmt, arg...) \
          __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
  __printf(1, 2)
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c

index 992cf95..2229a74 100644 (file)
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -271,7 +271,7 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td)
  {
         struct file *lower_file = ecryptfs_file_to_lower(file);
  
-       if (lower_file->f_op && lower_file->f_op->flush) {
+       if (lower_file->f_op->flush) {
                 filemap_write_and_wait(file->f_mapping);
                 return lower_file->f_op->flush(lower_file, td);
         }
@@ -305,7 +305,7 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
         struct file *lower_file = NULL;
  
         lower_file = ecryptfs_file_to_lower(file);
-       if (lower_file->f_op && lower_file->f_op->fasync)
+       if (lower_file->f_op->fasync)
                 rc = lower_file->f_op->fasync(fd, lower_file, flag);
         return rc;
  }
@@ -318,7 +318,7 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  
         if (ecryptfs_file_to_private(file))
                 lower_file = ecryptfs_file_to_lower(file);
-       if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
+       if (lower_file->f_op->unlocked_ioctl)
                 rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
         return rc;
  }
@@ -332,7 +332,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  
         if (ecryptfs_file_to_private(file))
                 lower_file = ecryptfs_file_to_lower(file);
-       if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
+       if (lower_file->f_op && lower_file->f_op->compat_ioctl)
                 rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
         return rc;
  }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c

index 67e9b63..c36c448 100644 (file)
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
  
         dget(lower_dentry);
         lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_unlink(lower_dir_inode, lower_dentry);
+       rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
         if (rc) {
                 printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
                 goto out_unlock;
@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode,
         inode = __ecryptfs_get_inode(lower_dentry->d_inode,
                                      directory_inode->i_sb);
         if (IS_ERR(inode)) {
-               vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
+               vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
                 goto out_lock;
         }
         fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
@@ -361,8 +361,8 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
         BUG_ON(!d_count(lower_dentry));
  
         ecryptfs_set_dentry_private(dentry, dentry_info);
-       ecryptfs_set_dentry_lower(dentry, lower_dentry);
-       ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
+       dentry_info->lower_path.mnt = lower_mnt;
+       dentry_info->lower_path.dentry = lower_dentry;
  
         if (!lower_dentry->d_inode) {
                 /* We want to add because we couldn't find in lower */
@@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
         dget(lower_new_dentry);
         lower_dir_dentry = lock_parent(lower_new_dentry);
         rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
-                     lower_new_dentry);
+                     lower_new_dentry, NULL);
         if (rc || !lower_new_dentry->d_inode)
                 goto out_lock;
         rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
@@ -640,7 +640,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 goto out_lock;
         }
         rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
-                       lower_new_dir_dentry->d_inode, lower_new_dentry);
+                       lower_new_dir_dentry->d_inode, lower_new_dentry,
+                       NULL);
         if (rc)
                 goto out_lock;
         if (target_inode)
@@ -703,16 +704,6 @@ out:
         return NULL;
  }
  
-static void
-ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
-{
-       char *buf = nd_get_link(nd);
-       if (!IS_ERR(buf)) {
-               /* Free the char* */
-               kfree(buf);
-       }
-}
-
  /**
   * upper_size_to_lower_size
   * @crypt_stat: Crypt_stat associated with file
@@ -891,7 +882,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
                 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
  
                 mutex_lock(&lower_dentry->d_inode->i_mutex);
-               rc = notify_change(lower_dentry, &lower_ia);
+               rc = notify_change(lower_dentry, &lower_ia, NULL);
                 mutex_unlock(&lower_dentry->d_inode->i_mutex);
         }
         return rc;
@@ -992,7 +983,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
                 lower_ia.ia_valid &= ~ATTR_MODE;
  
         mutex_lock(&lower_dentry->d_inode->i_mutex);
-       rc = notify_change(lower_dentry, &lower_ia);
+       rc = notify_change(lower_dentry, &lower_ia, NULL);
         mutex_unlock(&lower_dentry->d_inode->i_mutex);
  out:
         fsstack_copy_attr_all(inode, lower_inode);
@@ -1121,7 +1112,7 @@ out:
  const struct inode_operations ecryptfs_symlink_iops = {
         .readlink = generic_readlink,
         .follow_link = ecryptfs_follow_link,
-       .put_link = ecryptfs_put_link,
+       .put_link = kfree_put_link,
         .permission = ecryptfs_permission,
         .setattr = ecryptfs_setattr,
         .getattr = ecryptfs_getattr_link,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c

index eb1c597..1b119d3 100644 (file)
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -585,8 +585,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
  
         /* ->kill_sb() will take care of root_info */
         ecryptfs_set_dentry_private(s->s_root, root_info);
-       ecryptfs_set_dentry_lower(s->s_root, path.dentry);
-       ecryptfs_set_dentry_lower_mnt(s->s_root, path.mnt);
+       root_info->lower_path = path;
  
         s->s_flags |= MS_ACTIVE;
         return dget(s->s_root);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c

index f7fe7e3..79b65c3 100644 (file)
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1848,7 +1848,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
  
         /* The target file descriptor must support poll */
         error = -EPERM;
-       if (!tf.file->f_op || !tf.file->f_op->poll)
+       if (!tf.file->f_op->poll)
                 goto error_tgt_fput;
  
         /* Check if EPOLLWAKEUP is allowed */
diff --git a/fs/exec.c b/fs/exec.c

index 1212062..977319f 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -106,6 +106,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
   */
  SYSCALL_DEFINE1(uselib, const char __user *, library)
  {
+       struct linux_binfmt *fmt;
         struct file *file;
         struct filename *tmp = getname(library);
         int error = PTR_ERR(tmp);
@@ -136,24 +137,21 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
         fsnotify_open(file);
  
         error = -ENOEXEC;
-       if(file->f_op) {
-               struct linux_binfmt * fmt;
  
-               read_lock(&binfmt_lock);
-               list_for_each_entry(fmt, &formats, lh) {
-                       if (!fmt->load_shlib)
-                               continue;
-                       if (!try_module_get(fmt->module))
-                               continue;
-                       read_unlock(&binfmt_lock);
-                       error = fmt->load_shlib(file);
-                       read_lock(&binfmt_lock);
-                       put_binfmt(fmt);
-                       if (error != -ENOEXEC)
-                               break;
-               }
+       read_lock(&binfmt_lock);
+       list_for_each_entry(fmt, &formats, lh) {
+               if (!fmt->load_shlib)
+                       continue;
+               if (!try_module_get(fmt->module))
+                       continue;
                 read_unlock(&binfmt_lock);
+               error = fmt->load_shlib(file);
+               read_lock(&binfmt_lock);
+               put_binfmt(fmt);
+               if (error != -ENOEXEC)
+                       break;
         }
+       read_unlock(&binfmt_lock);
  exit:
         fput(file);
  out:
@@ -1277,13 +1275,10 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
   */
  int prepare_binprm(struct linux_binprm *bprm)
  {
-       umode_t mode;
-       struct inode * inode = file_inode(bprm->file);
+       struct inode *inode = file_inode(bprm->file);
+       umode_t mode = inode->i_mode;
         int retval;
  
-       mode = inode->i_mode;
-       if (bprm->file->f_op == NULL)
-               return -EACCES;
  
         /* clear any previous set[ug]id data from a previous binary */
         bprm->cred->euid = current_euid();
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c

index a235f00..48a359d 100644 (file)
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -69,145 +69,162 @@ find_acceptable_alias(struct dentry *result,
         return NULL;
  }
  
-/*
- * Find root of a disconnected subtree and return a reference to it.
- */
-static struct dentry *
-find_disconnected_root(struct dentry *dentry)
+static bool dentry_connected(struct dentry *dentry)
  {
         dget(dentry);
-       while (!IS_ROOT(dentry)) {
+       while (dentry->d_flags & DCACHE_DISCONNECTED) {
                 struct dentry *parent = dget_parent(dentry);
  
-               if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
+               dput(dentry);
+               if (IS_ROOT(dentry)) {
                         dput(parent);
-                       break;
+                       return false;
                 }
+               dentry = parent;
+       }
+       dput(dentry);
+       return true;
+}
+
+static void clear_disconnected(struct dentry *dentry)
+{
+       dget(dentry);
+       while (dentry->d_flags & DCACHE_DISCONNECTED) {
+               struct dentry *parent = dget_parent(dentry);
+
+               WARN_ON_ONCE(IS_ROOT(dentry));
+
+               spin_lock(&dentry->d_lock);
+               dentry->d_flags &= ~DCACHE_DISCONNECTED;
+               spin_unlock(&dentry->d_lock);
  
                 dput(dentry);
                 dentry = parent;
         }
-       return dentry;
+       dput(dentry);
+}
+
+/*
+ * Reconnect a directory dentry with its parent.
+ *
+ * This can return a dentry, or NULL, or an error.
+ *
+ * In the first case the returned dentry is the parent of the given
+ * dentry, and may itself need to be reconnected to its parent.
+ *
+ * In the NULL case, a concurrent VFS operation has either renamed or
+ * removed this directory.  The concurrent operation has reconnected our
+ * dentry, so we no longer need to.
+ */
+static struct dentry *reconnect_one(struct vfsmount *mnt,
+               struct dentry *dentry, char *nbuf)
+{
+       struct dentry *parent;
+       struct dentry *tmp;
+       int err;
+
+       parent = ERR_PTR(-EACCES);
+       mutex_lock(&dentry->d_inode->i_mutex);
+       if (mnt->mnt_sb->s_export_op->get_parent)
+               parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
+       mutex_unlock(&dentry->d_inode->i_mutex);
+
+       if (IS_ERR(parent)) {
+               dprintk("%s: get_parent of %ld failed, err %d\n",
+                       __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
+               return parent;
+       }
+
+       dprintk("%s: find name of %lu in %lu\n", __func__,
+               dentry->d_inode->i_ino, parent->d_inode->i_ino);
+       err = exportfs_get_name(mnt, parent, nbuf, dentry);
+       if (err == -ENOENT)
+               goto out_reconnected;
+       if (err)
+               goto out_err;
+       dprintk("%s: found name: %s\n", __func__, nbuf);
+       mutex_lock(&parent->d_inode->i_mutex);
+       tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
+       mutex_unlock(&parent->d_inode->i_mutex);
+       if (IS_ERR(tmp)) {
+               dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+               goto out_err;
+       }
+       if (tmp != dentry) {
+               dput(tmp);
+               goto out_reconnected;
+       }
+       dput(tmp);
+       if (IS_ROOT(dentry)) {
+               err = -ESTALE;
+               goto out_err;
+       }
+       return parent;
+
+out_err:
+       dput(parent);
+       return ERR_PTR(err);
+out_reconnected:
+       dput(parent);
+       /*
+        * Someone must have renamed our entry into another parent, in
+        * which case it has been reconnected by the rename.
+        *
+        * Or someone removed it entirely, in which case filehandle
+        * lookup will succeed but the directory is now IS_DEAD and
+        * subsequent operations on it will fail.
+        *
+        * Alternatively, maybe there was no race at all, and the
+        * filesystem is just corrupt and gave us a parent that doesn't
+        * actually contain any entry pointing to this inode.  So,
+        * double check that this worked and return -ESTALE if not:
+        */
+       if (!dentry_connected(dentry))
+               return ERR_PTR(-ESTALE);
+       return NULL;
  }
  
  /*
   * Make sure target_dir is fully connected to the dentry tree.
   *
- * It may already be, as the flag isn't always updated when connection happens.
+ * On successful return, DCACHE_DISCONNECTED will be cleared on
+ * target_dir, and target_dir->d_parent->...->d_parent will reach the
+ * root of the filesystem.
+ *
+ * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
+ * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
+ * set but already be connected.  In that case we'll verify the
+ * connection to root and then clear the flag.
+ *
+ * Note that target_dir could be removed by a concurrent operation.  In
+ * that case reconnect_path may still succeed with target_dir fully
+ * connected, but further operations using the filehandle will fail when
+ * necessary (due to S_DEAD being set on the directory).
   */
  static int
  reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
  {
-       int noprogress = 0;
-       int err = -ESTALE;
+       struct dentry *dentry, *parent;
  
-       /*
-        * It is possible that a confused file system might not let us complete
-        * the path to the root.  For example, if get_parent returns a directory
-        * in which we cannot find a name for the child.  While this implies a
-        * very sick filesystem we don't want it to cause knfsd to spin.  Hence
-        * the noprogress counter.  If we go through the loop 10 times (2 is
-        * probably enough) without getting anywhere, we just give up
-        */
-       while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
-               struct dentry *pd = find_disconnected_root(target_dir);
-
-               if (!IS_ROOT(pd)) {
-                       /* must have found a connected parent - great */
-                       spin_lock(&pd->d_lock);
-                       pd->d_flags &= ~DCACHE_DISCONNECTED;
-                       spin_unlock(&pd->d_lock);
-                       noprogress = 0;
-               } else if (pd == mnt->mnt_sb->s_root) {
-                       printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n");
-                       spin_lock(&pd->d_lock);
-                       pd->d_flags &= ~DCACHE_DISCONNECTED;
-                       spin_unlock(&pd->d_lock);
-                       noprogress = 0;
-               } else {
-                       /*
-                        * We have hit the top of a disconnected path, try to
-                        * find parent and connect.
-                        *
-                        * Racing with some other process renaming a directory
-                        * isn't much of a problem here.  If someone renames
-                        * the directory, it will end up properly connected,
-                        * which is what we want
-                        *
-                        * Getting the parent can't be supported generically,
-                        * the locking is too icky.
-                        *
-                        * Instead we just return EACCES.  If server reboots
-                        * or inodes get flushed, you lose
-                        */
-                       struct dentry *ppd = ERR_PTR(-EACCES);
-                       struct dentry *npd;
-
-                       mutex_lock(&pd->d_inode->i_mutex);
-                       if (mnt->mnt_sb->s_export_op->get_parent)
-                               ppd = mnt->mnt_sb->s_export_op->get_parent(pd);
-                       mutex_unlock(&pd->d_inode->i_mutex);
-
-                       if (IS_ERR(ppd)) {
-                               err = PTR_ERR(ppd);
-                               dprintk("%s: get_parent of %ld failed, err %d\n",
-                                       __func__, pd->d_inode->i_ino, err);
-                               dput(pd);
-                               break;
-                       }
+       dentry = dget(target_dir);
  
-                       dprintk("%s: find name of %lu in %lu\n", __func__,
-                               pd->d_inode->i_ino, ppd->d_inode->i_ino);
-                       err = exportfs_get_name(mnt, ppd, nbuf, pd);
-                       if (err) {
-                               dput(ppd);
-                               dput(pd);
-                               if (err == -ENOENT)
-                                       /* some race between get_parent and
-                                        * get_name?  just try again
-                                        */
-                                       continue;
-                               break;
-                       }
-                       dprintk("%s: found name: %s\n", __func__, nbuf);
-                       mutex_lock(&ppd->d_inode->i_mutex);
-                       npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
-                       mutex_unlock(&ppd->d_inode->i_mutex);
-                       if (IS_ERR(npd)) {
-                               err = PTR_ERR(npd);
-                               dprintk("%s: lookup failed: %d\n",
-                                       __func__, err);
-                               dput(ppd);
-                               dput(pd);
-                               break;
-                       }
-                       /* we didn't really want npd, we really wanted
-                        * a side-effect of the lookup.
-                        * hopefully, npd == pd, though it isn't really
-                        * a problem if it isn't
-                        */
-                       if (npd == pd)
-                               noprogress = 0;
-                       else
-                               printk("%s: npd != pd\n", __func__);
-                       dput(npd);
-                       dput(ppd);
-                       if (IS_ROOT(pd)) {
-                               /* something went wrong, we have to give up */
-                               dput(pd);
-                               break;
-                       }
-               }
-               dput(pd);
-       }
+       while (dentry->d_flags & DCACHE_DISCONNECTED) {
+               BUG_ON(dentry == mnt->mnt_sb->s_root);
  
-       if (target_dir->d_flags & DCACHE_DISCONNECTED) {
-               /* something went wrong - oh-well */
-               if (!err)
-                       err = -ESTALE;
-               return err;
-       }
+               if (IS_ROOT(dentry))
+                       parent = reconnect_one(mnt, dentry, nbuf);
+               else
+                       parent = dget_parent(dentry);
  
+               if (!parent)
+                       break;
+               dput(dentry);
+               if (IS_ERR(parent))
+                       return PTR_ERR(parent);
+               dentry = parent;
+       }
+       dput(dentry);
+       clear_disconnected(target_dir);
         return 0;
  }
  
@@ -215,7 +232,7 @@ struct getdents_callback {
         struct dir_context ctx;
         char *name;             /* name that was found. It already points to a
                                    buffer NAME_MAX+1 is size */
-       unsigned long ino;      /* the inum we are looking for */
+       u64 ino;                /* the inum we are looking for */
         int found;              /* inode matched? */
         int sequence;           /* sequence counter */
  };
@@ -255,10 +272,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
         struct inode *dir = path->dentry->d_inode;
         int error;
         struct file *file;
+       struct kstat stat;
+       struct path child_path = {
+               .mnt = path->mnt,
+               .dentry = child,
+       };
         struct getdents_callback buffer = {
                 .ctx.actor = filldir_one,
                 .name = name,
-               .ino = child->d_inode->i_ino
         };
  
         error = -ENOTDIR;
@@ -267,6 +288,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
         error = -EINVAL;
         if (!dir->i_fop)
                 goto out;
+       /*
+        * inode->i_ino is unsigned long, kstat->ino is u64, so the
+        * former would be insufficient on 32-bit hosts when the
+        * filesystem supports 64-bit inode numbers.  So we need to
+        * actually call ->getattr, not just read i_ino:
+        */
+       error = vfs_getattr_nosec(&child_path, &stat);
+       if (error)
+               return error;
+       buffer.ino = stat.ino;
         /*
          * Open the directory ...
          */
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index c260de6..8a33764 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -632,6 +632,8 @@ static int ext2_get_blocks(struct inode *inode,
         int count = 0;
         ext2_fsblk_t first_block = 0;
  
+       BUG_ON(maxblocks == 0);
+
         depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
  
         if (depth == 0)
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c

index 1c33128..e98171a 100644 (file)
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -35,6 +35,7 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
         int rc;
  
         memset(&tmp, 0, sizeof(struct buffer_head));
+       tmp.b_size = 1 << inode->i_blkbits;
         rc = ext2_get_block(inode, pgoff, &tmp, create);
         *result = tmp.b_blocknr;
  
diff --git a/fs/ext3/super.c b/fs/ext3/super.c

index c50c761..37fd31e 100644 (file)
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2825,6 +2825,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
                  * bitmap, and an inode table.
                  */
                 overhead += ngroups * (2 + sbi->s_itb_per_group);
+
+               /* Add the journal blocks as well */
+                overhead += sbi->s_journal->j_maxlen;
+
                 sbi->s_overhead_last = overhead;
                 smp_wmb();
                 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index af815ea..d01d623 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2734,8 +2734,6 @@ extern void ext4_double_down_write_data_sem(struct inode *first,
                                             struct inode *second);
  extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
                                           struct inode *donor_inode);
-void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
-void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
  extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
                              __u64 start_orig, __u64 start_donor,
                              __u64 len, __u64 *moved_len);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c

index a569d33..60589b6 100644 (file)
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
  
         /* Protect orig inodes against a truncate and make sure,
          * that only 1 swap_inode_boot_loader is running. */
-       ext4_inode_double_lock(inode, inode_bl);
+       lock_two_nondirectories(inode, inode_bl);
  
         truncate_inode_pages(&inode->i_data, 0);
         truncate_inode_pages(&inode_bl->i_data, 0);
@@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
         ext4_inode_resume_unlocked_dio(inode);
         ext4_inode_resume_unlocked_dio(inode_bl);
  
-       ext4_inode_double_unlock(inode, inode_bl);
+       unlock_two_nondirectories(inode, inode_bl);
  
         iput(inode_bl);
  
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c

index 7fa4d85..773b503 100644 (file)
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1202,42 +1202,6 @@ mext_check_arguments(struct inode *orig_inode,
         return 0;
  }
  
-/**
- * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
- *
- * @inode1:    the inode structure
- * @inode2:    the inode structure
- *
- * Lock two inodes' i_mutex
- */
-void
-ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
-{
-       BUG_ON(inode1 == inode2);
-       if (inode1 < inode2) {
-               mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
-       } else {
-               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
-               mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
-       }
-}
-
-/**
- * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
- *
- * @inode1:     the inode that is released first
- * @inode2:     the inode that is released second
- *
- */
-
-void
-ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
-{
-       mutex_unlock(&inode1->i_mutex);
-       mutex_unlock(&inode2->i_mutex);
-}
-
  /**
   * ext4_move_extents - Exchange the specified range of a file
   *
@@ -1327,7 +1291,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                 return -EINVAL;
         }
         /* Protect orig and donor inodes against a truncate */
-       ext4_inode_double_lock(orig_inode, donor_inode);
+       lock_two_nondirectories(orig_inode, donor_inode);
  
         /* Wait for all existing dio workers */
         ext4_inode_block_unlocked_dio(orig_inode);
@@ -1535,7 +1499,7 @@ out:
         ext4_double_up_write_data_sem(orig_inode, donor_inode);
         ext4_inode_resume_unlocked_dio(orig_inode);
         ext4_inode_resume_unlocked_dio(donor_inode);
-       ext4_inode_double_unlock(orig_inode, donor_inode);
+       unlock_two_nondirectories(orig_inode, donor_inode);
  
         return ret;
  }
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig

index e06e099..214fe10 100644 (file)
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -63,3 +63,11 @@ config F2FS_FS_SECURITY
           the extended attribute support in advance.
  
           If you are not using a security module, say N.
+
+config F2FS_CHECK_FS
+       bool "F2FS consistency checking feature"
+       depends on F2FS_FS
+       help
+         Enables BUG_ONs which check the file system consistency in runtime.
+
+         If you want to improve the performance, say N.
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c

index b7826ec..d0fc287 100644 (file)
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -205,7 +205,8 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
         return acl;
  }
  
-static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+static int f2fs_set_acl(struct inode *inode, int type,
+                       struct posix_acl *acl, struct page *ipage)
  {
         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
         struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -250,7 +251,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                 }
         }
  
-       error = f2fs_setxattr(inode, name_index, "", value, size, NULL);
+       error = f2fs_setxattr(inode, name_index, "", value, size, ipage);
  
         kfree(value);
         if (!error)
@@ -260,10 +261,10 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
         return error;
  }
  
-int f2fs_init_acl(struct inode *inode, struct inode *dir)
+int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
  {
-       struct posix_acl *acl = NULL;
         struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+       struct posix_acl *acl = NULL;
         int error = 0;
  
         if (!S_ISLNK(inode->i_mode)) {
@@ -276,19 +277,19 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir)
                         inode->i_mode &= ~current_umask();
         }
  
-       if (test_opt(sbi, POSIX_ACL) && acl) {
+       if (!test_opt(sbi, POSIX_ACL) || !acl)
+               goto cleanup;
  
-               if (S_ISDIR(inode->i_mode)) {
-                       error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
-                       if (error)
-                               goto cleanup;
-               }
-               error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
-               if (error < 0)
-                       return error;
-               if (error > 0)
-                       error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+       if (S_ISDIR(inode->i_mode)) {
+               error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl, ipage);
+               if (error)
+                       goto cleanup;
         }
+       error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
+       if (error < 0)
+               return error;
+       if (error > 0)
+               error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage);
  cleanup:
         posix_acl_release(acl);
         return error;
@@ -313,7 +314,8 @@ int f2fs_acl_chmod(struct inode *inode)
         error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
         if (error)
                 return error;
-       error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+
+       error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, NULL);
         posix_acl_release(acl);
         return error;
  }
@@ -388,7 +390,7 @@ static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
                 acl = NULL;
         }
  
-       error = f2fs_set_acl(inode, type, acl);
+       error = f2fs_set_acl(inode, type, acl, NULL);
  
  release_and_out:
         posix_acl_release(acl);
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h

index 80f4306..4963313 100644 (file)
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -36,9 +36,9 @@ struct f2fs_acl_header {
  
  #ifdef CONFIG_F2FS_FS_POSIX_ACL
  
-extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type);
-extern int f2fs_acl_chmod(struct inode *inode);
-extern int f2fs_init_acl(struct inode *inode, struct inode *dir);
+extern struct posix_acl *f2fs_get_acl(struct inode *, int);
+extern int f2fs_acl_chmod(struct inode *);
+extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);
  #else
  #define f2fs_check_acl NULL
  #define f2fs_get_acl   NULL
@@ -49,7 +49,8 @@ static inline int f2fs_acl_chmod(struct inode *inode)
         return 0;
  }
  
-static inline int f2fs_init_acl(struct inode *inode, struct inode *dir)
+static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
+                                                       struct page *page)
  {
         return 0;
  }
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c

index bb31220..5716e5e 100644 (file)
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -81,7 +81,7 @@ static int f2fs_write_meta_page(struct page *page,
         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
  
         /* Should not write any meta pages, if any IO error was occurred */
-       if (wbc->for_reclaim ||
+       if (wbc->for_reclaim || sbi->por_doing ||
                         is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
                 dec_page_count(sbi, F2FS_DIRTY_META);
                 wbc->pages_skipped++;
@@ -142,8 +142,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
                 for (i = 0; i < nr_pages; i++) {
                         struct page *page = pvec.pages[i];
                         lock_page(page);
-                       BUG_ON(page->mapping != mapping);
-                       BUG_ON(!PageDirty(page));
+                       f2fs_bug_on(page->mapping != mapping);
+                       f2fs_bug_on(!PageDirty(page));
                         clear_page_dirty_for_io(page);
                         if (f2fs_write_meta_page(page, &wbc)) {
                                 unlock_page(page);
@@ -167,6 +167,8 @@ static int f2fs_set_meta_page_dirty(struct page *page)
         struct address_space *mapping = page->mapping;
         struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
  
+       trace_f2fs_set_page_dirty(page, META);
+
         SetPageUptodate(page);
         if (!PageDirty(page)) {
                 __set_page_dirty_nobuffers(page);
@@ -206,6 +208,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
  void release_orphan_inode(struct f2fs_sb_info *sbi)
  {
         mutex_lock(&sbi->orphan_inode_mutex);
+       f2fs_bug_on(sbi->n_orphans == 0);
         sbi->n_orphans--;
         mutex_unlock(&sbi->orphan_inode_mutex);
  }
@@ -225,12 +228,8 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
                         break;
                 orphan = NULL;
         }
-retry:
-       new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
-       if (!new) {
-               cond_resched();
-               goto retry;
-       }
+
+       new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
         new->ino = ino;
  
         /* add new_oentry into list which is sorted by inode number */
@@ -253,6 +252,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
                 if (orphan->ino == ino) {
                         list_del(&orphan->list);
                         kmem_cache_free(orphan_entry_slab, orphan);
+                       f2fs_bug_on(sbi->n_orphans == 0);
                         sbi->n_orphans--;
                         break;
                 }
@@ -263,7 +263,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
  static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
  {
         struct inode *inode = f2fs_iget(sbi->sb, ino);
-       BUG_ON(IS_ERR(inode));
+       f2fs_bug_on(IS_ERR(inode));
         clear_nlink(inode);
  
         /* truncate all the data during iput */
@@ -277,7 +277,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
         if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
                 return 0;
  
-       sbi->por_doing = 1;
+       sbi->por_doing = true;
         start_blk = __start_cp_addr(sbi) + 1;
         orphan_blkaddr = __start_sum_addr(sbi) - 1;
  
@@ -294,7 +294,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
         }
         /* clear Orphan Flag */
         clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
-       sbi->por_doing = 0;
+       sbi->por_doing = false;
         return 0;
  }
  
@@ -469,9 +469,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
                         return -EEXIST;
         }
         list_add_tail(&new->list, head);
-#ifdef CONFIG_F2FS_STAT_FS
-       sbi->n_dirty_dirs++;
-#endif
+       stat_inc_dirty_dir(sbi);
         return 0;
  }
  
@@ -482,12 +480,8 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
  
         if (!S_ISDIR(inode->i_mode))
                 return;
-retry:
-       new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
-       if (!new) {
-               cond_resched();
-               goto retry;
-       }
+
+       new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
         new->inode = inode;
         INIT_LIST_HEAD(&new->list);
  
@@ -504,13 +498,9 @@ retry:
  void add_dirty_dir_inode(struct inode *inode)
  {
         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-       struct dir_inode_entry *new;
-retry:
-       new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
-       if (!new) {
-               cond_resched();
-               goto retry;
-       }
+       struct dir_inode_entry *new =
+                       f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+
         new->inode = inode;
         INIT_LIST_HEAD(&new->list);
  
@@ -541,9 +531,7 @@ void remove_dirty_dir_inode(struct inode *inode)
                 if (entry->inode == inode) {
                         list_del(&entry->list);
                         kmem_cache_free(inode_entry_slab, entry);
-#ifdef CONFIG_F2FS_STAT_FS
-                       sbi->n_dirty_dirs--;
-#endif
+                       stat_dec_dirty_dir(sbi);
                         break;
                 }
         }
@@ -617,11 +605,10 @@ static void block_operations(struct f2fs_sb_info *sbi)
         blk_start_plug(&plug);
  
  retry_flush_dents:
-       mutex_lock_all(sbi);
-
+       f2fs_lock_all(sbi);
         /* write all the dirty dentry pages */
         if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
-               mutex_unlock_all(sbi);
+               f2fs_unlock_all(sbi);
                 sync_dirty_dir_inodes(sbi);
                 goto retry_flush_dents;
         }
@@ -644,7 +631,22 @@ retry_flush_nodes:
  static void unblock_operations(struct f2fs_sb_info *sbi)
  {
         mutex_unlock(&sbi->node_write);
-       mutex_unlock_all(sbi);
+       f2fs_unlock_all(sbi);
+}
+
+static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+{
+       DEFINE_WAIT(wait);
+
+       for (;;) {
+               prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
+
+               if (!get_pages(sbi, F2FS_WRITEBACK))
+                       break;
+
+               io_schedule();
+       }
+       finish_wait(&sbi->cp_wait, &wait);
  }
  
  static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
@@ -756,8 +758,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
         f2fs_put_page(cp_page, 1);
  
         /* wait for previous submitted node/meta pages writeback */
-       while (get_pages(sbi, F2FS_WRITEBACK))
-               congestion_wait(BLK_RW_ASYNC, HZ / 50);
+       wait_on_all_pages_writeback(sbi);
  
         filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
         filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c

index 941f9b9..aa3438c 100644 (file)
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -68,9 +68,6 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
                                         struct buffer_head *bh_result)
  {
         struct f2fs_inode_info *fi = F2FS_I(inode);
-#ifdef CONFIG_F2FS_STAT_FS
-       struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-#endif
         pgoff_t start_fofs, end_fofs;
         block_t start_blkaddr;
  
@@ -80,9 +77,8 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
                 return 0;
         }
  
-#ifdef CONFIG_F2FS_STAT_FS
-       sbi->total_hit_ext++;
-#endif
+       stat_inc_total_hit(inode->i_sb);
+
         start_fofs = fi->ext.fofs;
         end_fofs = fi->ext.fofs + fi->ext.len - 1;
         start_blkaddr = fi->ext.blk_addr;
@@ -100,9 +96,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
                 else
                         bh_result->b_size = UINT_MAX;
  
-#ifdef CONFIG_F2FS_STAT_FS
-               sbi->read_hit_ext++;
-#endif
+               stat_inc_read_hit(inode->i_sb);
                 read_unlock(&fi->ext.ext_lock);
                 return 1;
         }
@@ -116,7 +110,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
         pgoff_t fofs, start_fofs, end_fofs;
         block_t start_blkaddr, end_blkaddr;
  
-       BUG_ON(blk_addr == NEW_ADDR);
+       f2fs_bug_on(blk_addr == NEW_ADDR);
         fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
                                                         dn->ofs_in_node;
  
@@ -442,7 +436,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock,
         }
  
         /* It does not support data allocation */
-       BUG_ON(create);
+       f2fs_bug_on(create);
  
         if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
                 int i;
@@ -560,9 +554,9 @@ write:
                 inode_dec_dirty_dents(inode);
                 err = do_write_data_page(page);
         } else {
-               int ilock = mutex_lock_op(sbi);
+               f2fs_lock_op(sbi);
                 err = do_write_data_page(page);
-               mutex_unlock_op(sbi, ilock);
+               f2fs_unlock_op(sbi);
                 need_balance_fs = true;
         }
         if (err == -ENOENT)
@@ -641,7 +635,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
         pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
         struct dnode_of_data dn;
         int err = 0;
-       int ilock;
  
         f2fs_balance_fs(sbi);
  repeat:
@@ -650,7 +643,7 @@ repeat:
                 return -ENOMEM;
         *pagep = page;
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
  
         set_new_dnode(&dn, inode, NULL, NULL, 0);
         err = get_dnode_of_data(&dn, index, ALLOC_NODE);
@@ -664,7 +657,7 @@ repeat:
         if (err)
                 goto err;
  
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
                 return 0;
@@ -700,7 +693,7 @@ out:
         return 0;
  
  err:
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
         f2fs_put_page(page, 1);
         return err;
  }
@@ -763,6 +756,8 @@ static int f2fs_set_data_page_dirty(struct page *page)
         struct address_space *mapping = page->mapping;
         struct inode *inode = mapping->host;
  
+       trace_f2fs_set_page_dirty(page, DATA);
+
         SetPageUptodate(page);
         if (!PageDirty(page)) {
                 __set_page_dirty_nobuffers(page);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c

index 384c6da..594fc1b 100644 (file)
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -139,7 +139,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
         bool room = false;
         int max_slots = 0;
  
-       BUG_ON(level > MAX_DIR_HASH_DEPTH);
+       f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
  
         nbucket = dir_buckets(level);
         nblock = bucket_blocks(level);
@@ -346,7 +346,7 @@ static struct page *init_inode_metadata(struct inode *inode,
                                 goto error;
                 }
  
-               err = f2fs_init_acl(inode, dir);
+               err = f2fs_init_acl(inode, dir, page);
                 if (err)
                         goto error;
  
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h

index 608f0df..89dc750 100644 (file)
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -18,6 +18,13 @@
  #include <linux/crc32.h>
  #include <linux/magic.h>
  #include <linux/kobject.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_F2FS_CHECK_FS
+#define f2fs_bug_on(condition) BUG_ON(condition)
+#else
+#define f2fs_bug_on(condition)
+#endif
  
  /*
   * For mount options
@@ -298,6 +305,9 @@ struct f2fs_sm_info {
         unsigned int main_segments;     /* # of segments in main area */
         unsigned int reserved_segments; /* # of reserved segments */
         unsigned int ovp_segments;      /* # of overprovision segments */
+
+       /* a threshold to reclaim prefree segments */
+       unsigned int rec_prefree_segments;
  };
  
  /*
@@ -317,14 +327,6 @@ enum count_type {
         NR_COUNT_TYPE,
  };
  
-/*
- * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS].
- * The checkpoint procedure blocks all the locks in this fs_lock array.
- * Some FS operations grab free locks, and if there is no free lock,
- * then wait to grab a lock in a round-robin manner.
- */
-#define NR_GLOBAL_LOCKS        8
-
  /*
   * The below are the page types of bios used in submti_bio().
   * The available types are:
@@ -365,12 +367,12 @@ struct f2fs_sb_info {
         struct f2fs_checkpoint *ckpt;           /* raw checkpoint pointer */
         struct inode *meta_inode;               /* cache meta blocks */
         struct mutex cp_mutex;                  /* checkpoint procedure lock */
-       struct mutex fs_lock[NR_GLOBAL_LOCKS];  /* blocking FS operations */
+       struct rw_semaphore cp_rwsem;           /* blocking FS operations */
         struct mutex node_write;                /* locking node writes */
         struct mutex writepages;                /* mutex for writepages() */
-       unsigned char next_lock_num;            /* round-robin global locks */
-       int por_doing;                          /* recovery is doing or not */
-       int on_build_free_nids;                 /* build_free_nids is doing */
+       bool por_doing;                         /* recovery is doing or not */
+       bool on_build_free_nids;                /* build_free_nids is doing */
+       wait_queue_head_t cp_wait;
  
         /* for orphan inode management */
         struct list_head orphan_inode_list;     /* orphan inode list */
@@ -520,48 +522,24 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
         cp->ckpt_flags = cpu_to_le32(ckpt_flags);
  }
  
-static inline void mutex_lock_all(struct f2fs_sb_info *sbi)
+static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
  {
-       int i;
-
-       for (i = 0; i < NR_GLOBAL_LOCKS; i++) {
-               /*
-                * This is the only time we take multiple fs_lock[]
-                * instances; the order is immaterial since we
-                * always hold cp_mutex, which serializes multiple
-                * such operations.
-                */
-               mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex);
-       }
+       down_read(&sbi->cp_rwsem);
  }
  
-static inline void mutex_unlock_all(struct f2fs_sb_info *sbi)
+static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
  {
-       int i = 0;
-       for (; i < NR_GLOBAL_LOCKS; i++)
-               mutex_unlock(&sbi->fs_lock[i]);
+       up_read(&sbi->cp_rwsem);
  }
  
-static inline int mutex_lock_op(struct f2fs_sb_info *sbi)
+static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
  {
-       unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS;
-       int i = 0;
-
-       for (; i < NR_GLOBAL_LOCKS; i++)
-               if (mutex_trylock(&sbi->fs_lock[i]))
-                       return i;
-
-       mutex_lock(&sbi->fs_lock[next_lock]);
-       sbi->next_lock_num++;
-       return next_lock;
+       down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex);
  }
  
-static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock)
+static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
  {
-       if (ilock < 0)
-               return;
-       BUG_ON(ilock >= NR_GLOBAL_LOCKS);
-       mutex_unlock(&sbi->fs_lock[ilock]);
+       up_write(&sbi->cp_rwsem);
  }
  
  /*
@@ -612,8 +590,8 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
                                                 blkcnt_t count)
  {
         spin_lock(&sbi->stat_lock);
-       BUG_ON(sbi->total_valid_block_count < (block_t) count);
-       BUG_ON(inode->i_blocks < count);
+       f2fs_bug_on(sbi->total_valid_block_count < (block_t) count);
+       f2fs_bug_on(inode->i_blocks < count);
         inode->i_blocks -= count;
         sbi->total_valid_block_count -= (block_t)count;
         spin_unlock(&sbi->stat_lock);
@@ -745,9 +723,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
  {
         spin_lock(&sbi->stat_lock);
  
-       BUG_ON(sbi->total_valid_block_count < count);
-       BUG_ON(sbi->total_valid_node_count < count);
-       BUG_ON(inode->i_blocks < count);
+       f2fs_bug_on(sbi->total_valid_block_count < count);
+       f2fs_bug_on(sbi->total_valid_node_count < count);
+       f2fs_bug_on(inode->i_blocks < count);
  
         inode->i_blocks -= count;
         sbi->total_valid_node_count -= count;
@@ -768,7 +746,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
  static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
  {
         spin_lock(&sbi->stat_lock);
-       BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count);
+       f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count);
         sbi->total_valid_inode_count++;
         spin_unlock(&sbi->stat_lock);
  }
@@ -776,7 +754,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
  static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
  {
         spin_lock(&sbi->stat_lock);
-       BUG_ON(!sbi->total_valid_inode_count);
+       f2fs_bug_on(!sbi->total_valid_inode_count);
         sbi->total_valid_inode_count--;
         spin_unlock(&sbi->stat_lock);
         return 0;
@@ -797,7 +775,7 @@ static inline void f2fs_put_page(struct page *page, int unlock)
                 return;
  
         if (unlock) {
-               BUG_ON(!PageLocked(page));
+               f2fs_bug_on(!PageLocked(page));
                 unlock_page(page);
         }
         page_cache_release(page);
@@ -819,6 +797,20 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
         return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
  }
  
+static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
+                                               gfp_t flags)
+{
+       void *entry;
+retry:
+       entry = kmem_cache_alloc(cachep, flags);
+       if (!entry) {
+               cond_resched();
+               goto retry;
+       }
+
+       return entry;
+}
+
  #define RAW_IS_INODE(p)        ((p)->footer.nid == (p)->footer.ino)
  
  static inline bool IS_INODE(struct page *page)
@@ -979,6 +971,7 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
   */
  void f2fs_set_inode_flags(struct inode *);
  struct inode *f2fs_iget(struct super_block *, unsigned long);
+int try_to_free_nats(struct f2fs_sb_info *, int);
  void update_inode(struct inode *, struct page *);
  int update_inode_page(struct inode *);
  int f2fs_write_inode(struct inode *, struct writeback_control *);
@@ -1033,6 +1026,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
  int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
  int truncate_inode_blocks(struct inode *, pgoff_t);
  int truncate_xattr_node(struct inode *, struct page *);
+int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
  int remove_inode_page(struct inode *);
  struct page *new_inode_page(struct inode *, const struct qstr *);
  struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
@@ -1059,6 +1053,7 @@ void destroy_node_manager_caches(void);
   * segment.c
   */
  void f2fs_balance_fs(struct f2fs_sb_info *);
+void f2fs_balance_fs_bg(struct f2fs_sb_info *);
  void invalidate_blocks(struct f2fs_sb_info *, block_t);
  void clear_prefree_segments(struct f2fs_sb_info *);
  int npages_for_summary_flush(struct f2fs_sb_info *);
@@ -1172,7 +1167,16 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
         return (struct f2fs_stat_info*)sbi->stat_info;
  }
  
-#define stat_inc_call_count(si)        ((si)->call_count++)
+#define stat_inc_call_count(si)                ((si)->call_count++)
+#define stat_inc_bggc_count(sbi)       ((sbi)->bg_gc++)
+#define stat_inc_dirty_dir(sbi)                ((sbi)->n_dirty_dirs++)
+#define stat_dec_dirty_dir(sbi)                ((sbi)->n_dirty_dirs--)
+#define stat_inc_total_hit(sb)         ((F2FS_SB(sb))->total_hit_ext++)
+#define stat_inc_read_hit(sb)          ((F2FS_SB(sb))->read_hit_ext++)
+#define stat_inc_seg_type(sbi, curseg)                                 \
+               ((sbi)->segment_count[(curseg)->alloc_type]++)
+#define stat_inc_block_count(sbi, curseg)                              \
+               ((sbi)->block_count[(curseg)->alloc_type]++)
  
  #define stat_inc_seg_count(sbi, type)                                  \
         do {                                                            \
@@ -1207,6 +1211,13 @@ void __init f2fs_create_root_stats(void);
  void f2fs_destroy_root_stats(void);
  #else
  #define stat_inc_call_count(si)
+#define stat_inc_bggc_count(si)
+#define stat_inc_dirty_dir(sbi)
+#define stat_dec_dirty_dir(sbi)
+#define stat_inc_total_hit(sb)
+#define stat_inc_read_hit(sb)
+#define stat_inc_seg_type(sbi, curseg)
+#define stat_inc_block_count(sbi, curseg)
  #define stat_inc_seg_count(si, type)
  #define stat_inc_tot_blk_count(si, blks)
  #define stat_inc_data_blk_count(si, blks)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c

index 02c9069..7d714f4 100644 (file)
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,18 +35,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
         block_t old_blk_addr;
         struct dnode_of_data dn;
-       int err, ilock;
+       int err;
  
         f2fs_balance_fs(sbi);
  
         sb_start_pagefault(inode->i_sb);
  
         /* block allocation */
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         set_new_dnode(&dn, inode, NULL, NULL, 0);
         err = get_dnode_of_data(&dn, page->index, ALLOC_NODE);
         if (err) {
-               mutex_unlock_op(sbi, ilock);
+               f2fs_unlock_op(sbi);
                 goto out;
         }
  
@@ -56,12 +56,12 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
                 err = reserve_new_block(&dn);
                 if (err) {
                         f2fs_put_dnode(&dn);
-                       mutex_unlock_op(sbi, ilock);
+                       f2fs_unlock_op(sbi);
                         goto out;
                 }
         }
         f2fs_put_dnode(&dn);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         file_update_time(vma->vm_file);
         lock_page(page);
@@ -88,6 +88,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
         set_page_dirty(page);
         SetPageUptodate(page);
  
+       trace_f2fs_vm_page_mkwrite(page, DATA);
  mapped:
         /* fill the page */
         wait_on_page_writeback(page);
@@ -188,8 +189,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                         if (ret)
                                 goto out;
                 }
-               filemap_fdatawait_range(sbi->node_inode->i_mapping,
-                                                       0, LONG_MAX);
+               ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
+               if (ret)
+                       goto out;
                 ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
         }
  out:
@@ -270,7 +272,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
         unsigned int blocksize = inode->i_sb->s_blocksize;
         struct dnode_of_data dn;
         pgoff_t free_from;
-       int count = 0, ilock = -1;
+       int count = 0;
         int err;
  
         trace_f2fs_truncate_blocks_enter(inode, from);
@@ -278,13 +280,13 @@ static int truncate_blocks(struct inode *inode, u64 from)
         free_from = (pgoff_t)
                         ((from + blocksize - 1) >> (sbi->log_blocksize));
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         set_new_dnode(&dn, inode, NULL, NULL, 0);
         err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
         if (err) {
                 if (err == -ENOENT)
                         goto free_next;
-               mutex_unlock_op(sbi, ilock);
+               f2fs_unlock_op(sbi);
                 trace_f2fs_truncate_blocks_exit(inode, err);
                 return err;
         }
@@ -295,7 +297,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
                 count = ADDRS_PER_BLOCK;
  
         count -= dn.ofs_in_node;
-       BUG_ON(count < 0);
+       f2fs_bug_on(count < 0);
  
         if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
                 truncate_data_blocks_range(&dn, count);
@@ -305,7 +307,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
         f2fs_put_dnode(&dn);
  free_next:
         err = truncate_inode_blocks(inode, free_from);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         /* lastly zero out the first data page */
         truncate_partial_data_page(inode, from);
@@ -416,16 +418,15 @@ static void fill_zero(struct inode *inode, pgoff_t index,
  {
         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
         struct page *page;
-       int ilock;
  
         if (!len)
                 return;
  
         f2fs_balance_fs(sbi);
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         page = get_new_data_page(inode, NULL, index, false);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         if (!IS_ERR(page)) {
                 wait_on_page_writeback(page);
@@ -484,7 +485,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
                         struct address_space *mapping = inode->i_mapping;
                         loff_t blk_start, blk_end;
                         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-                       int ilock;
  
                         f2fs_balance_fs(sbi);
  
@@ -493,9 +493,9 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
                         truncate_inode_pages_range(mapping, blk_start,
                                         blk_end - 1);
  
-                       ilock = mutex_lock_op(sbi);
+                       f2fs_lock_op(sbi);
                         ret = truncate_hole(inode, pg_start, pg_end);
-                       mutex_unlock_op(sbi, ilock);
+                       f2fs_unlock_op(sbi);
                 }
         }
  
@@ -529,13 +529,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
  
         for (index = pg_start; index <= pg_end; index++) {
                 struct dnode_of_data dn;
-               int ilock;
  
-               ilock = mutex_lock_op(sbi);
+               f2fs_lock_op(sbi);
                 set_new_dnode(&dn, inode, NULL, NULL, 0);
                 ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
                 if (ret) {
-                       mutex_unlock_op(sbi, ilock);
+                       f2fs_unlock_op(sbi);
                         break;
                 }
  
@@ -543,12 +542,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
                         ret = reserve_new_block(&dn);
                         if (ret) {
                                 f2fs_put_dnode(&dn);
-                               mutex_unlock_op(sbi, ilock);
+                               f2fs_unlock_op(sbi);
                                 break;
                         }
                 }
                 f2fs_put_dnode(&dn);
-               mutex_unlock_op(sbi, ilock);
+               f2fs_unlock_op(sbi);
  
                 if (pg_start == pg_end)
                         new_size = offset + len;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c

index 2f157e8..b7ad1ec 100644 (file)
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -77,13 +77,15 @@ static int gc_thread_func(void *data)
                 else
                         wait_ms = increase_sleep_time(gc_th, wait_ms);
  
-#ifdef CONFIG_F2FS_STAT_FS
-               sbi->bg_gc++;
-#endif
+               stat_inc_bggc_count(sbi);
  
                 /* if return value is not zero, no victim was selected */
                 if (f2fs_gc(sbi))
                         wait_ms = gc_th->no_gc_sleep_time;
+
+               /* balancing f2fs's metadata periodically */
+               f2fs_balance_fs_bg(sbi);
+
         } while (!kthread_should_stop());
         return 0;
  }
@@ -236,8 +238,8 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
         return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
  }
  
-static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
-                                       struct victim_sel_policy *p)
+static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
+                       unsigned int segno, struct victim_sel_policy *p)
  {
         if (p->alloc_mode == SSR)
                 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
@@ -293,7 +295,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
                         }
                         break;
                 }
-               p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
+
+               p.offset = segno + p.ofs_unit;
+               if (p.ofs_unit > 1)
+                       p.offset -= segno % p.ofs_unit;
+
                 secno = GET_SECNO(sbi, segno);
  
                 if (sec_usage_check(sbi, secno))
@@ -306,10 +312,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
                 if (p.min_cost > cost) {
                         p.min_segno = segno;
                         p.min_cost = cost;
-               }
-
-               if (cost == max_cost)
+               } else if (unlikely(cost == max_cost)) {
                         continue;
+               }
  
                 if (nsearched++ >= p.max_search) {
                         sbi->last_victim[p.gc_mode] = segno;
@@ -358,12 +363,8 @@ static void add_gc_inode(struct inode *inode, struct list_head *ilist)
                 iput(inode);
                 return;
         }
-repeat:
-       new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
-       if (!new_ie) {
-               cond_resched();
-               goto repeat;
-       }
+
+       new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
         new_ie->inode = inode;
         list_add_tail(&new_ie->list, ilist);
  }
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c

index 9339cd2..d0eaa9f 100644 (file)
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -37,6 +37,31 @@ void f2fs_set_inode_flags(struct inode *inode)
                 inode->i_flags |= S_DIRSYNC;
  }
  
+static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+{
+       if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+                       S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               if (ri->i_addr[0])
+                       inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
+               else
+                       inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
+       }
+}
+
+static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+{
+       if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+               if (old_valid_dev(inode->i_rdev)) {
+                       ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev));
+                       ri->i_addr[1] = 0;
+               } else {
+                       ri->i_addr[0] = 0;
+                       ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev));
+                       ri->i_addr[2] = 0;
+               }
+       }
+}
+
  static int do_read_inode(struct inode *inode)
  {
         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -73,10 +98,6 @@ static int do_read_inode(struct inode *inode)
         inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
         inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
         inode->i_generation = le32_to_cpu(ri->i_generation);
-       if (ri->i_addr[0])
-               inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
-       else
-               inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
  
         fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
         fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
@@ -84,8 +105,13 @@ static int do_read_inode(struct inode *inode)
         fi->flags = 0;
         fi->i_advise = ri->i_advise;
         fi->i_pino = le32_to_cpu(ri->i_pino);
+
         get_extent_info(&fi->ext, ri->i_ext);
         get_inline_info(fi, ri);
+
+       /* get rdev by using inline_info */
+       __get_inode_rdev(inode, ri);
+
         f2fs_put_page(node_page, 1);
         return 0;
  }
@@ -179,21 +205,10 @@ void update_inode(struct inode *inode, struct page *node_page)
         ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
         ri->i_generation = cpu_to_le32(inode->i_generation);
  
-       if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
-               if (old_valid_dev(inode->i_rdev)) {
-                       ri->i_addr[0] =
-                               cpu_to_le32(old_encode_dev(inode->i_rdev));
-                       ri->i_addr[1] = 0;
-               } else {
-                       ri->i_addr[0] = 0;
-                       ri->i_addr[1] =
-                               cpu_to_le32(new_encode_dev(inode->i_rdev));
-                       ri->i_addr[2] = 0;
-               }
-       }
-
+       __set_inode_rdev(inode, ri);
         set_cold_node(inode, node_page);
         set_page_dirty(node_page);
+
         clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
  }
  
@@ -214,7 +229,7 @@ int update_inode_page(struct inode *inode)
  int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
  {
         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-       int ret, ilock;
+       int ret;
  
         if (inode->i_ino == F2FS_NODE_INO(sbi) ||
                         inode->i_ino == F2FS_META_INO(sbi))
@@ -227,9 +242,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
          * We need to lock here to prevent from producing dirty node pages
          * during the urgent cleaning time when runing out of free sections.
          */
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         ret = update_inode_page(inode);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         if (wbc)
                 f2fs_balance_fs(sbi);
@@ -243,7 +258,6 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
  void f2fs_evict_inode(struct inode *inode)
  {
         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-       int ilock;
  
         trace_f2fs_evict_inode(inode);
         truncate_inode_pages(&inode->i_data, 0);
@@ -252,7 +266,7 @@ void f2fs_evict_inode(struct inode *inode)
                         inode->i_ino == F2FS_META_INO(sbi))
                 goto no_delete;
  
-       BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents));
+       f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents));
         remove_dirty_dir_inode(inode);
  
         if (inode->i_nlink || is_bad_inode(inode))
@@ -265,9 +279,9 @@ void f2fs_evict_inode(struct inode *inode)
         if (F2FS_HAS_BLOCKS(inode))
                 f2fs_truncate(inode);
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         remove_inode_page(inode);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         sb_end_intwrite(inode->i_sb);
  no_delete:
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c

index 2a5359c..575adac 100644 (file)
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -27,19 +27,19 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
         nid_t ino;
         struct inode *inode;
         bool nid_free = false;
-       int err, ilock;
+       int err;
  
         inode = new_inode(sb);
         if (!inode)
                 return ERR_PTR(-ENOMEM);
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         if (!alloc_nid(sbi, &ino)) {
-               mutex_unlock_op(sbi, ilock);
+               f2fs_unlock_op(sbi);
                 err = -ENOSPC;
                 goto fail;
         }
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         inode->i_uid = current_fsuid();
  
@@ -115,7 +115,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
         struct f2fs_sb_info *sbi = F2FS_SB(sb);
         struct inode *inode;
         nid_t ino = 0;
-       int err, ilock;
+       int err;
  
         f2fs_balance_fs(sbi);
  
@@ -131,9 +131,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
         inode->i_mapping->a_ops = &f2fs_dblock_aops;
         ino = inode->i_ino;
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
         if (err)
                 goto out;
  
@@ -157,7 +157,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
         struct inode *inode = old_dentry->d_inode;
         struct super_block *sb = dir->i_sb;
         struct f2fs_sb_info *sbi = F2FS_SB(sb);
-       int err, ilock;
+       int err;
  
         f2fs_balance_fs(sbi);
  
@@ -165,9 +165,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
         ihold(inode);
  
         set_inode_flag(F2FS_I(inode), FI_INC_LINK);
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
         if (err)
                 goto out;
  
@@ -220,7 +220,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
         struct f2fs_dir_entry *de;
         struct page *page;
         int err = -ENOENT;
-       int ilock;
  
         trace_f2fs_unlink_enter(dir, dentry);
         f2fs_balance_fs(sbi);
@@ -229,16 +228,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
         if (!de)
                 goto fail;
  
+       f2fs_lock_op(sbi);
         err = acquire_orphan_inode(sbi);
         if (err) {
+               f2fs_unlock_op(sbi);
                 kunmap(page);
                 f2fs_put_page(page, 0);
                 goto fail;
         }
-
-       ilock = mutex_lock_op(sbi);
         f2fs_delete_entry(de, page, inode);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         /* In order to evict this inode,  we set it dirty */
         mark_inode_dirty(inode);
@@ -254,7 +253,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
         struct f2fs_sb_info *sbi = F2FS_SB(sb);
         struct inode *inode;
         size_t symlen = strlen(symname) + 1;
-       int err, ilock;
+       int err;
  
         f2fs_balance_fs(sbi);
  
@@ -265,9 +264,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
         inode->i_op = &f2fs_symlink_inode_operations;
         inode->i_mapping->a_ops = &f2fs_dblock_aops;
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
         if (err)
                 goto out;
  
@@ -290,7 +289,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  {
         struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
         struct inode *inode;
-       int err, ilock;
+       int err;
  
         f2fs_balance_fs(sbi);
  
@@ -304,9 +303,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
  
         set_inode_flag(F2FS_I(inode), FI_INC_LINK);
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
         if (err)
                 goto out_fail;
  
@@ -342,7 +341,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
         struct f2fs_sb_info *sbi = F2FS_SB(sb);
         struct inode *inode;
         int err = 0;
-       int ilock;
  
         if (!new_valid_dev(rdev))
                 return -EINVAL;
@@ -356,9 +354,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
         init_special_inode(inode, inode->i_mode, rdev);
         inode->i_op = &f2fs_special_inode_operations;
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         err = f2fs_add_link(dentry, inode);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
         if (err)
                 goto out;
  
@@ -387,7 +385,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
         struct f2fs_dir_entry *old_dir_entry = NULL;
         struct f2fs_dir_entry *old_entry;
         struct f2fs_dir_entry *new_entry;
-       int err = -ENOENT, ilock = -1;
+       int err = -ENOENT;
  
         f2fs_balance_fs(sbi);
  
@@ -402,7 +400,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                         goto out_old;
         }
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
  
         if (new_inode) {
  
@@ -467,7 +465,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 update_inode_page(old_dir);
         }
  
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
         return 0;
  
  put_out_dir:
@@ -477,7 +475,7 @@ out_dir:
                 kunmap(old_dir_page);
                 f2fs_put_page(old_dir_page, 0);
         }
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  out_old:
         kunmap(old_page);
         f2fs_put_page(old_page, 0);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c

index 51ef278..4ac4150 100644 (file)
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -204,7 +204,7 @@ retry:
                 }
                 e->ni = *ni;
                 e->checkpointed = true;
-               BUG_ON(ni->blk_addr == NEW_ADDR);
+               f2fs_bug_on(ni->blk_addr == NEW_ADDR);
         } else if (new_blkaddr == NEW_ADDR) {
                 /*
                  * when nid is reallocated,
@@ -212,19 +212,19 @@ retry:
                  * So, reinitialize it with new information.
                  */
                 e->ni = *ni;
-               BUG_ON(ni->blk_addr != NULL_ADDR);
+               f2fs_bug_on(ni->blk_addr != NULL_ADDR);
         }
  
         if (new_blkaddr == NEW_ADDR)
                 e->checkpointed = false;
  
         /* sanity check */
-       BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
-       BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
+       f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
+       f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
                         new_blkaddr == NULL_ADDR);
-       BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
+       f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR &&
                         new_blkaddr == NEW_ADDR);
-       BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
+       f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR &&
                         nat_get_blkaddr(e) != NULL_ADDR &&
                         new_blkaddr == NEW_ADDR);
  
@@ -240,7 +240,7 @@ retry:
         write_unlock(&nm_i->nat_tree_lock);
  }
  
-static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
  {
         struct f2fs_nm_info *nm_i = NM_I(sbi);
  
@@ -495,10 +495,10 @@ static void truncate_node(struct dnode_of_data *dn)
  
         get_node_info(sbi, dn->nid, &ni);
         if (dn->inode->i_blocks == 0) {
-               BUG_ON(ni.blk_addr != NULL_ADDR);
+               f2fs_bug_on(ni.blk_addr != NULL_ADDR);
                 goto invalidate;
         }
-       BUG_ON(ni.blk_addr == NULL_ADDR);
+       f2fs_bug_on(ni.blk_addr == NULL_ADDR);
  
         /* Deallocate node address */
         invalidate_blocks(sbi, ni.blk_addr);
@@ -822,7 +822,7 @@ int remove_inode_page(struct inode *inode)
         }
  
         /* 0 is possible, after f2fs_new_inode() is failed */
-       BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
+       f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
         set_new_dnode(&dn, inode, page, page, ino);
         truncate_node(&dn);
         return 0;
@@ -863,7 +863,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
         get_node_info(sbi, dn->nid, &old_ni);
  
         /* Reinitialize old_ni with new node page */
-       BUG_ON(old_ni.blk_addr != NULL_ADDR);
+       f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
         new_ni = old_ni;
         new_ni.ino = dn->inode->i_ino;
         set_node_addr(sbi, &new_ni, NEW_ADDR);
@@ -969,7 +969,7 @@ repeat:
                 goto repeat;
         }
  got_it:
-       BUG_ON(nid != nid_of_node(page));
+       f2fs_bug_on(nid != nid_of_node(page));
         mark_page_accessed(page);
         return page;
  }
@@ -1148,6 +1148,47 @@ continue_unlock:
         return nwritten;
  }
  
+int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+{
+       struct address_space *mapping = sbi->node_inode->i_mapping;
+       pgoff_t index = 0, end = LONG_MAX;
+       struct pagevec pvec;
+       int nr_pages;
+       int ret2 = 0, ret = 0;
+
+       pagevec_init(&pvec, 0);
+       while ((index <= end) &&
+                       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                       PAGECACHE_TAG_WRITEBACK,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
+               unsigned i;
+
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       /* until radix tree lookup accepts end_index */
+                       if (page->index > end)
+                               continue;
+
+                       if (ino && ino_of_node(page) == ino) {
+                               wait_on_page_writeback(page);
+                               if (TestClearPageError(page))
+                                       ret = -EIO;
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+
+       if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
+               ret2 = -ENOSPC;
+       if (test_and_clear_bit(AS_EIO, &mapping->flags))
+               ret2 = -EIO;
+       if (!ret)
+               ret = ret2;
+       return ret;
+}
+
  static int f2fs_write_node_page(struct page *page,
                                 struct writeback_control *wbc)
  {
@@ -1156,11 +1197,14 @@ static int f2fs_write_node_page(struct page *page,
         block_t new_addr;
         struct node_info ni;
  
+       if (sbi->por_doing)
+               goto redirty_out;
+
         wait_on_page_writeback(page);
  
         /* get old block addr of this node page */
         nid = nid_of_node(page);
-       BUG_ON(page->index != nid);
+       f2fs_bug_on(page->index != nid);
  
         get_node_info(sbi, nid, &ni);
  
@@ -1171,12 +1215,8 @@ static int f2fs_write_node_page(struct page *page,
                 return 0;
         }
  
-       if (wbc->for_reclaim) {
-               dec_page_count(sbi, F2FS_DIRTY_NODES);
-               wbc->pages_skipped++;
-               set_page_dirty(page);
-               return AOP_WRITEPAGE_ACTIVATE;
-       }
+       if (wbc->for_reclaim)
+               goto redirty_out;
  
         mutex_lock(&sbi->node_write);
         set_page_writeback(page);
@@ -1186,6 +1226,12 @@ static int f2fs_write_node_page(struct page *page,
         mutex_unlock(&sbi->node_write);
         unlock_page(page);
         return 0;
+
+redirty_out:
+       dec_page_count(sbi, F2FS_DIRTY_NODES);
+       wbc->pages_skipped++;
+       set_page_dirty(page);
+       return AOP_WRITEPAGE_ACTIVATE;
  }
  
  /*
@@ -1200,11 +1246,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
         struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
         long nr_to_write = wbc->nr_to_write;
  
-       /* First check balancing cached NAT entries */
-       if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
-               f2fs_sync_fs(sbi->sb, true);
-               return 0;
-       }
+       /* balancing f2fs's metadata in background */
+       f2fs_balance_fs_bg(sbi);
  
         /* collect a number of dirty node pages and write together */
         if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
@@ -1223,6 +1266,8 @@ static int f2fs_set_node_page_dirty(struct page *page)
         struct address_space *mapping = page->mapping;
         struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
  
+       trace_f2fs_set_page_dirty(page, NODE);
+
         SetPageUptodate(page);
         if (!PageDirty(page)) {
                 __set_page_dirty_nobuffers(page);
@@ -1291,23 +1336,18 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
         if (nid == 0)
                 return 0;
  
-       if (!build)
-               goto retry;
-
-       /* do not add allocated nids */
-       read_lock(&nm_i->nat_tree_lock);
-       ne = __lookup_nat_cache(nm_i, nid);
-       if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
-               allocated = true;
-       read_unlock(&nm_i->nat_tree_lock);
-       if (allocated)
-               return 0;
-retry:
-       i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
-       if (!i) {
-               cond_resched();
-               goto retry;
+       if (build) {
+               /* do not add allocated nids */
+               read_lock(&nm_i->nat_tree_lock);
+               ne = __lookup_nat_cache(nm_i, nid);
+               if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
+                       allocated = true;
+               read_unlock(&nm_i->nat_tree_lock);
+               if (allocated)
+                       return 0;
         }
+
+       i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
         i->nid = nid;
         i->state = NID_NEW;
  
@@ -1350,7 +1390,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,
                         break;
  
                 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
-               BUG_ON(blk_addr == NEW_ADDR);
+               f2fs_bug_on(blk_addr == NEW_ADDR);
                 if (blk_addr == NULL_ADDR) {
                         if (add_free_nid(nm_i, start_nid, true) < 0)
                                 break;
@@ -1421,14 +1461,14 @@ retry:
  
         /* We should not use stale free nids created by build_free_nids */
         if (nm_i->fcnt && !sbi->on_build_free_nids) {
-               BUG_ON(list_empty(&nm_i->free_nid_list));
+               f2fs_bug_on(list_empty(&nm_i->free_nid_list));
                 list_for_each(this, &nm_i->free_nid_list) {
                         i = list_entry(this, struct free_nid, list);
                         if (i->state == NID_NEW)
                                 break;
                 }
  
-               BUG_ON(i->state != NID_NEW);
+               f2fs_bug_on(i->state != NID_NEW);
                 *nid = i->nid;
                 i->state = NID_ALLOC;
                 nm_i->fcnt--;
@@ -1439,9 +1479,9 @@ retry:
  
         /* Let's scan nat pages and its caches to get free nids */
         mutex_lock(&nm_i->build_lock);
-       sbi->on_build_free_nids = 1;
+       sbi->on_build_free_nids = true;
         build_free_nids(sbi);
-       sbi->on_build_free_nids = 0;
+       sbi->on_build_free_nids = false;
         mutex_unlock(&nm_i->build_lock);
         goto retry;
  }
@@ -1456,7 +1496,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
  
         spin_lock(&nm_i->free_nid_list_lock);
         i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
-       BUG_ON(!i || i->state != NID_ALLOC);
+       f2fs_bug_on(!i || i->state != NID_ALLOC);
         __del_from_free_nid_list(i);
         spin_unlock(&nm_i->free_nid_list_lock);
  }
@@ -1474,7 +1514,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
  
         spin_lock(&nm_i->free_nid_list_lock);
         i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
-       BUG_ON(!i || i->state != NID_ALLOC);
+       f2fs_bug_on(!i || i->state != NID_ALLOC);
         if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
                 __del_from_free_nid_list(i);
         } else {
@@ -1677,7 +1717,7 @@ to_nat_page:
                         nat_blk = page_address(page);
                 }
  
-               BUG_ON(!nat_blk);
+               f2fs_bug_on(!nat_blk);
                 raw_ne = nat_blk->entries[nid - start_nid];
  flush_now:
                 new_blkaddr = nat_get_blkaddr(ne);
@@ -1781,11 +1821,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
         /* destroy free nid list */
         spin_lock(&nm_i->free_nid_list_lock);
         list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
-               BUG_ON(i->state == NID_ALLOC);
+               f2fs_bug_on(i->state == NID_ALLOC);
                 __del_from_free_nid_list(i);
                 nm_i->fcnt--;
         }
-       BUG_ON(nm_i->fcnt);
+       f2fs_bug_on(nm_i->fcnt);
         spin_unlock(&nm_i->free_nid_list_lock);
  
         /* destroy nat cache */
@@ -1799,7 +1839,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
                         __del_from_nat_cache(nm_i, e);
                 }
         }
-       BUG_ON(nm_i->nat_cnt);
+       f2fs_bug_on(nm_i->nat_cnt);
         write_unlock(&nm_i->nat_tree_lock);
  
         kfree(nm_i->nat_bitmap);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c

index 51ef5ee..fdc8116 100644 (file)
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -64,24 +64,31 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
         name.name = raw_inode->i_name;
  retry:
         de = f2fs_find_entry(dir, &name, &page);
-       if (de && inode->i_ino == le32_to_cpu(de->ino)) {
-               kunmap(page);
-               f2fs_put_page(page, 0);
-               goto out;
-       }
+       if (de && inode->i_ino == le32_to_cpu(de->ino))
+               goto out_unmap_put;
         if (de) {
                 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
                 if (IS_ERR(einode)) {
                         WARN_ON(1);
                         if (PTR_ERR(einode) == -ENOENT)
                                 err = -EEXIST;
-                       goto out;
+                       goto out_unmap_put;
+               }
+               err = acquire_orphan_inode(F2FS_SB(inode->i_sb));
+               if (err) {
+                       iput(einode);
+                       goto out_unmap_put;
                 }
                 f2fs_delete_entry(de, page, einode);
                 iput(einode);
                 goto retry;
         }
         err = __f2fs_add_link(dir, &name, inode);
+       goto out;
+
+out_unmap_put:
+       kunmap(page);
+       f2fs_put_page(page, 0);
  out:
         f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: "
                         "ino = %x, name = %s, dir = %lx, err = %d",
@@ -285,7 +292,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
         struct f2fs_summary sum;
         struct node_info ni;
         int err = 0, recovered = 0;
-       int ilock;
  
         start = start_bidx_of_node(ofs_of_node(page), fi);
         if (IS_INODE(page))
@@ -293,20 +299,20 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
         else
                 end = start + ADDRS_PER_BLOCK;
  
-       ilock = mutex_lock_op(sbi);
+       f2fs_lock_op(sbi);
         set_new_dnode(&dn, inode, NULL, NULL, 0);
  
         err = get_dnode_of_data(&dn, start, ALLOC_NODE);
         if (err) {
-               mutex_unlock_op(sbi, ilock);
+               f2fs_unlock_op(sbi);
                 return err;
         }
  
         wait_on_page_writeback(dn.node_page);
  
         get_node_info(sbi, dn.nid, &ni);
-       BUG_ON(ni.ino != ino_of_node(page));
-       BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page));
+       f2fs_bug_on(ni.ino != ino_of_node(page));
+       f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page));
  
         for (; start < end; start++) {
                 block_t src, dest;
@@ -316,9 +322,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
  
                 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
                         if (src == NULL_ADDR) {
-                               int err = reserve_new_block(&dn);
+                               err = reserve_new_block(&dn);
                                 /* We should not get -ENOSPC */
-                               BUG_ON(err);
+                               f2fs_bug_on(err);
                         }
  
                         /* Check the previous node page having this index */
@@ -349,7 +355,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
         recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
  err:
         f2fs_put_dnode(&dn);
-       mutex_unlock_op(sbi, ilock);
+       f2fs_unlock_op(sbi);
  
         f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, "
                         "recovered_data = %d blocks, err = %d",
@@ -419,6 +425,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
  {
         struct list_head inode_list;
         int err;
+       bool need_writecp = false;
  
         fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
                         sizeof(struct fsync_inode_entry), NULL);
@@ -428,7 +435,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
         INIT_LIST_HEAD(&inode_list);
  
         /* step #1: find fsynced inode numbers */
-       sbi->por_doing = 1;
+       sbi->por_doing = true;
         err = find_fsync_dnodes(sbi, &inode_list);
         if (err)
                 goto out;
@@ -436,14 +443,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
         if (list_empty(&inode_list))
                 goto out;
  
+       need_writecp = true;
+
         /* step #2: recover data */
         err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
-       BUG_ON(!list_empty(&inode_list));
+       f2fs_bug_on(!list_empty(&inode_list));
  out:
         destroy_fsync_dnodes(&inode_list);
         kmem_cache_destroy(fsync_entry_slab);
-       sbi->por_doing = 0;
-       if (!err)
+       sbi->por_doing = false;
+       if (!err && need_writecp)
                 write_checkpoint(sbi, false);
         return err;
  }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c

index 09af9c7..fa284d3 100644 (file)
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -36,6 +36,14 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
         }
  }
  
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
+{
+       /* check the # of cached NAT entries and prefree segments */
+       if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
+                               excess_prefree_segs(sbi))
+               f2fs_sync_fs(sbi->sb, true);
+}
+
  static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                 enum dirty_type dirty_type)
  {
@@ -50,20 +58,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
  
         if (dirty_type == DIRTY) {
                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
-               enum dirty_type t = DIRTY_HOT_DATA;
-
-               dirty_type = sentry->type;
-
-               if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
-                       dirty_i->nr_dirty[dirty_type]++;
+               enum dirty_type t = sentry->type;
  
-               /* Only one bitmap should be set */
-               for (; t <= DIRTY_COLD_NODE; t++) {
-                       if (t == dirty_type)
-                               continue;
-                       if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
-                               dirty_i->nr_dirty[t]--;
-               }
+               if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
+                       dirty_i->nr_dirty[t]++;
         }
  }
  
@@ -76,12 +74,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                 dirty_i->nr_dirty[dirty_type]--;
  
         if (dirty_type == DIRTY) {
-               enum dirty_type t = DIRTY_HOT_DATA;
+               struct seg_entry *sentry = get_seg_entry(sbi, segno);
+               enum dirty_type t = sentry->type;
  
-               /* clear all the bitmaps */
-               for (; t <= DIRTY_COLD_NODE; t++)
-                       if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
-                               dirty_i->nr_dirty[t]--;
+               if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
+                       dirty_i->nr_dirty[t]--;
  
                 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
                         clear_bit(GET_SECNO(sbi, segno),
@@ -142,27 +139,33 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
  void clear_prefree_segments(struct f2fs_sb_info *sbi)
  {
         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-       unsigned int segno = -1;
+       unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
         unsigned int total_segs = TOTAL_SEGS(sbi);
+       unsigned int start = 0, end = -1;
  
         mutex_lock(&dirty_i->seglist_lock);
+
         while (1) {
-               segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
-                               segno + 1);
-               if (segno >= total_segs)
+               int i;
+               start = find_next_bit(prefree_map, total_segs, end + 1);
+               if (start >= total_segs)
                         break;
+               end = find_next_zero_bit(prefree_map, total_segs, start + 1);
+
+               for (i = start; i < end; i++)
+                       clear_bit(i, prefree_map);
  
-               if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
-                       dirty_i->nr_dirty[PRE]--;
-
-               /* Let's use trim */
-               if (test_opt(sbi, DISCARD))
-                       blkdev_issue_discard(sbi->sb->s_bdev,
-                                       START_BLOCK(sbi, segno) <<
-                                       sbi->log_sectors_per_block,
-                                       1 << (sbi->log_sectors_per_block +
-                                               sbi->log_blocks_per_seg),
-                                       GFP_NOFS, 0);
+               dirty_i->nr_dirty[PRE] -= end - start;
+
+               if (!test_opt(sbi, DISCARD))
+                       continue;
+
+               blkdev_issue_discard(sbi->sb->s_bdev,
+                               START_BLOCK(sbi, start) <<
+                               sbi->log_sectors_per_block,
+                               (1 << (sbi->log_sectors_per_block +
+                               sbi->log_blocks_per_seg)) * (end - start),
+                               GFP_NOFS, 0);
         }
         mutex_unlock(&dirty_i->seglist_lock);
  }
@@ -195,7 +198,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
         new_vblocks = se->valid_blocks + del;
         offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
  
-       BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
+       f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
                                 (new_vblocks > sbi->blocks_per_seg)));
  
         se->valid_blocks = new_vblocks;
@@ -235,7 +238,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
         unsigned int segno = GET_SEGNO(sbi, addr);
         struct sit_info *sit_i = SIT_I(sbi);
  
-       BUG_ON(addr == NULL_ADDR);
+       f2fs_bug_on(addr == NULL_ADDR);
         if (addr == NEW_ADDR)
                 return;
  
@@ -267,9 +270,8 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
   */
  int npages_for_summary_flush(struct f2fs_sb_info *sbi)
  {
-       int total_size_bytes = 0;
         int valid_sum_count = 0;
-       int i, sum_space;
+       int i, sum_in_page;
  
         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
                 if (sbi->ckpt->alloc_type[i] == SSR)
@@ -278,13 +280,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi)
                         valid_sum_count += curseg_blkoff(sbi, i);
         }
  
-       total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
-                       + sizeof(struct nat_journal) + 2
-                       + sizeof(struct sit_journal) + 2;
-       sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
-       if (total_size_bytes < sum_space)
+       sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
+                       SUM_FOOTER_SIZE) / SUMMARY_SIZE;
+       if (valid_sum_count <= sum_in_page)
                 return 1;
-       else if (total_size_bytes < 2 * sum_space)
+       else if ((valid_sum_count - sum_in_page) <=
+               (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
                 return 2;
         return 3;
  }
@@ -350,7 +351,7 @@ find_other_zone:
                 if (dir == ALLOC_RIGHT) {
                         secno = find_next_zero_bit(free_i->free_secmap,
                                                         TOTAL_SECS(sbi), 0);
-                       BUG_ON(secno >= TOTAL_SECS(sbi));
+                       f2fs_bug_on(secno >= TOTAL_SECS(sbi));
                 } else {
                         go_left = 1;
                         left_start = hint - 1;
@@ -366,7 +367,7 @@ find_other_zone:
                 }
                 left_start = find_next_zero_bit(free_i->free_secmap,
                                                         TOTAL_SECS(sbi), 0);
-               BUG_ON(left_start >= TOTAL_SECS(sbi));
+               f2fs_bug_on(left_start >= TOTAL_SECS(sbi));
                 break;
         }
         secno = left_start;
@@ -405,7 +406,7 @@ skip_left:
         }
  got_it:
         /* set it as dirty segment in free segmap */
-       BUG_ON(test_bit(segno, free_i->free_segmap));
+       f2fs_bug_on(test_bit(segno, free_i->free_segmap));
         __set_inuse(sbi, segno);
         *newseg = segno;
         write_unlock(&free_i->segmap_lock);
@@ -550,9 +551,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
                 change_curseg(sbi, type, true);
         else
                 new_curseg(sbi, type, false);
-#ifdef CONFIG_F2FS_STAT_FS
-       sbi->segment_count[curseg->alloc_type]++;
-#endif
+
+       stat_inc_seg_type(sbi, curseg);
  }
  
  void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -597,6 +597,11 @@ static void f2fs_end_io_write(struct bio *bio, int err)
  
         if (p->is_sync)
                 complete(p->wait);
+
+       if (!get_pages(p->sbi, F2FS_WRITEBACK) &&
+                       !list_empty(&p->sbi->cp_wait.task_list))
+               wake_up(&p->sbi->cp_wait);
+
         kfree(p);
         bio_put(bio);
  }
@@ -657,6 +662,7 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
                                 block_t blk_addr, enum page_type type)
  {
         struct block_device *bdev = sbi->sb->s_bdev;
+       int bio_blocks;
  
         verify_block_addr(sbi, blk_addr);
  
@@ -676,7 +682,8 @@ retry:
                         goto retry;
                 }
  
-               sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi));
+               bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+               sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks);
                 sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
                 sbi->bio[type]->bi_private = priv;
                 /*
@@ -771,7 +778,7 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
                 return __get_segment_type_4(page, p_type);
         }
         /* NR_CURSEG_TYPE(6) logs by default */
-       BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
+       f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE);
         return __get_segment_type_6(page, p_type);
  }
  
@@ -801,9 +808,8 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
  
         mutex_lock(&sit_i->sentry_lock);
         __refresh_next_blkoff(sbi, curseg);
-#ifdef CONFIG_F2FS_STAT_FS
-       sbi->block_count[curseg->alloc_type]++;
-#endif
+
+       stat_inc_block_count(sbi, curseg);
  
         /*
          * SIT information should be updated before segment allocation,
@@ -849,7 +855,7 @@ void write_data_page(struct inode *inode, struct page *page,
         struct f2fs_summary sum;
         struct node_info ni;
  
-       BUG_ON(old_blkaddr == NULL_ADDR);
+       f2fs_bug_on(old_blkaddr == NULL_ADDR);
         get_node_info(sbi, dn->nid, &ni);
         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
  
@@ -1122,8 +1128,6 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
                                                 SUM_JOURNAL_SIZE);
         written_size += SUM_JOURNAL_SIZE;
  
-       set_page_dirty(page);
-
         /* Step 3: write summary entries */
         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
                 unsigned short blkoff;
@@ -1142,18 +1146,20 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
                         summary = (struct f2fs_summary *)(kaddr + written_size);
                         *summary = seg_i->sum_blk->entries[j];
                         written_size += SUMMARY_SIZE;
-                       set_page_dirty(page);
  
                         if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
                                                         SUM_FOOTER_SIZE)
                                 continue;
  
+                       set_page_dirty(page);
                         f2fs_put_page(page, 1);
                         page = NULL;
                 }
         }
-       if (page)
+       if (page) {
+               set_page_dirty(page);
                 f2fs_put_page(page, 1);
+       }
  }
  
  static void write_normal_summaries(struct f2fs_sb_info *sbi,
@@ -1239,7 +1245,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
         /* get current sit block page without lock */
         src_page = get_meta_page(sbi, src_off);
         dst_page = grab_meta_page(sbi, dst_off);
-       BUG_ON(PageDirty(src_page));
+       f2fs_bug_on(PageDirty(src_page));
  
         src_addr = page_address(src_page);
         dst_addr = page_address(dst_page);
@@ -1271,9 +1277,9 @@ static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
                         __mark_sit_entry_dirty(sbi, segno);
                 }
                 update_sits_in_cursum(sum, -sits_in_cursum(sum));
-               return 1;
+               return true;
         }
-       return 0;
+       return false;
  }
  
  /*
@@ -1637,6 +1643,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+       sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
  
         err = build_sit_info(sbi);
         if (err)
@@ -1744,6 +1751,8 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
  void destroy_segment_manager(struct f2fs_sb_info *sbi)
  {
         struct f2fs_sm_info *sm_info = SM_I(sbi);
+       if (!sm_info)
+               return;
         destroy_dirty_segmap(sbi);
         destroy_curseg(sbi);
         destroy_free_segmap(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h

index bdd10ea..269f690 100644 (file)
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,6 +14,8 @@
  #define NULL_SEGNO                     ((unsigned int)(~0))
  #define NULL_SECNO                     ((unsigned int)(~0))
  
+#define DEF_RECLAIM_PREFREE_SEGMENTS   100     /* 200MB of prefree segments */
+
  /* L: Logical segment # in volume, R: Relative segment # in main area */
  #define GET_L2R_SEGNO(free_i, segno)   (segno - free_i->start_segno)
  #define GET_R2L_SEGNO(free_i, segno)   (segno + free_i->start_segno)
@@ -90,6 +92,8 @@
         (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
  #define SECTOR_TO_BLOCK(sbi, sectors)                                  \
         (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
+#define MAX_BIO_BLOCKS(max_hw_blocks)                                  \
+       (min((int)max_hw_blocks, BIO_MAX_PAGES))
  
  /* during checkpoint, bio_private is used to synchronize the last bio */
  struct bio_private {
@@ -470,6 +474,11 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
                                                 reserved_sections(sbi)));
  }
  
+static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
+{
+       return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments);
+}
+
  static inline int utilization(struct f2fs_sb_info *sbi)
  {
         return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count);
@@ -513,16 +522,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
         return curseg->next_blkoff;
  }
  
+#ifdef CONFIG_F2FS_CHECK_FS
  static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
  {
         unsigned int end_segno = SM_I(sbi)->segment_count - 1;
         BUG_ON(segno > end_segno);
  }
  
-/*
- * This function is used for only debugging.
- * NOTE: In future, we have to remove this function.
- */
  static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
  {
         struct f2fs_sm_info *sm_info = SM_I(sbi);
@@ -541,8 +547,9 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
  {
         struct f2fs_sm_info *sm_info = SM_I(sbi);
         unsigned int end_segno = sm_info->segment_count - 1;
+       bool is_valid  = test_bit_le(0, raw_sit->valid_map) ? true : false;
         int valid_blocks = 0;
-       int i;
+       int cur_pos = 0, next_pos;
  
         /* check segment usage */
         BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
@@ -551,11 +558,26 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
         BUG_ON(segno > end_segno);
  
         /* check bitmap with valid block count */
-       for (i = 0; i < sbi->blocks_per_seg; i++)
-               if (f2fs_test_bit(i, raw_sit->valid_map))
-                       valid_blocks++;
+       do {
+               if (is_valid) {
+                       next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
+                                       sbi->blocks_per_seg,
+                                       cur_pos);
+                       valid_blocks += next_pos - cur_pos;
+               } else
+                       next_pos = find_next_bit_le(&raw_sit->valid_map,
+                                       sbi->blocks_per_seg,
+                                       cur_pos);
+               cur_pos = next_pos;
+               is_valid = !is_valid;
+       } while (cur_pos < sbi->blocks_per_seg);
         BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
  }
+#else
+#define check_seg_range(sbi, segno)
+#define verify_block_addr(sbi, blk_addr)
+#define check_block_count(sbi, segno, raw_sit)
+#endif
  
  static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
                                                 unsigned int start)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c

index 13d0a0f..bafff72 100644 (file)
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -43,7 +43,9 @@ enum {
         Opt_disable_roll_forward,
         Opt_discard,
         Opt_noheap,
+       Opt_user_xattr,
         Opt_nouser_xattr,
+       Opt_acl,
         Opt_noacl,
         Opt_active_logs,
         Opt_disable_ext_identify,
@@ -56,7 +58,9 @@ static match_table_t f2fs_tokens = {
         {Opt_disable_roll_forward, "disable_roll_forward"},
         {Opt_discard, "discard"},
         {Opt_noheap, "no_heap"},
+       {Opt_user_xattr, "user_xattr"},
         {Opt_nouser_xattr, "nouser_xattr"},
+       {Opt_acl, "acl"},
         {Opt_noacl, "noacl"},
         {Opt_active_logs, "active_logs=%u"},
         {Opt_disable_ext_identify, "disable_ext_identify"},
@@ -65,24 +69,40 @@ static match_table_t f2fs_tokens = {
  };
  
  /* Sysfs support for f2fs */
+enum {
+       GC_THREAD,      /* struct f2fs_gc_thread */
+       SM_INFO,        /* struct f2fs_sm_info */
+};
+
  struct f2fs_attr {
         struct attribute attr;
         ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
         ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
                          const char *, size_t);
+       int struct_type;
         int offset;
  };
  
+static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
+{
+       if (struct_type == GC_THREAD)
+               return (unsigned char *)sbi->gc_thread;
+       else if (struct_type == SM_INFO)
+               return (unsigned char *)SM_I(sbi);
+       return NULL;
+}
+
  static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
                         struct f2fs_sb_info *sbi, char *buf)
  {
-       struct f2fs_gc_kthread *gc_kth = sbi->gc_thread;
+       unsigned char *ptr = NULL;
         unsigned int *ui;
  
-       if (!gc_kth)
+       ptr = __struct_ptr(sbi, a->struct_type);
+       if (!ptr)
                 return -EINVAL;
  
-       ui = (unsigned int *)(((char *)gc_kth) + a->offset);
+       ui = (unsigned int *)(ptr + a->offset);
  
         return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
  }
@@ -91,15 +111,16 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
                         struct f2fs_sb_info *sbi,
                         const char *buf, size_t count)
  {
-       struct f2fs_gc_kthread *gc_kth = sbi->gc_thread;
+       unsigned char *ptr;
         unsigned long t;
         unsigned int *ui;
         ssize_t ret;
  
-       if (!gc_kth)
+       ptr = __struct_ptr(sbi, a->struct_type);
+       if (!ptr)
                 return -EINVAL;
  
-       ui = (unsigned int *)(((char *)gc_kth) + a->offset);
+       ui = (unsigned int *)(ptr + a->offset);
  
         ret = kstrtoul(skip_spaces(buf), 0, &t);
         if (ret < 0)
@@ -135,21 +156,25 @@ static void f2fs_sb_release(struct kobject *kobj)
         complete(&sbi->s_kobj_unregister);
  }
  
-#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \
+#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
  static struct f2fs_attr f2fs_attr_##_name = {                  \
         .attr = {.name = __stringify(_name), .mode = _mode },   \
         .show   = _show,                                        \
         .store  = _store,                                       \
-       .offset = offsetof(struct f2fs_gc_kthread, _elname),    \
+       .struct_type = _struct_type,                            \
+       .offset = _offset                                       \
  }
  
-#define F2FS_RW_ATTR(name, elname)     \
-       F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname)
+#define F2FS_RW_ATTR(struct_type, struct_name, name, elname)   \
+       F2FS_ATTR_OFFSET(struct_type, name, 0644,               \
+               f2fs_sbi_show, f2fs_sbi_store,                  \
+               offsetof(struct struct_name, elname))
  
-F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time);
-F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time);
-F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(gc_idle, gc_idle);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
  
  #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
  static struct attribute *f2fs_attrs[] = {
@@ -157,6 +182,7 @@ static struct attribute *f2fs_attrs[] = {
         ATTR_LIST(gc_max_sleep_time),
         ATTR_LIST(gc_no_gc_sleep_time),
         ATTR_LIST(gc_idle),
+       ATTR_LIST(reclaim_segments),
         NULL,
  };
  
@@ -237,6 +263,9 @@ static int parse_options(struct super_block *sb, char *options)
                         set_opt(sbi, NOHEAP);
                         break;
  #ifdef CONFIG_F2FS_FS_XATTR
+               case Opt_user_xattr:
+                       set_opt(sbi, XATTR_USER);
+                       break;
                 case Opt_nouser_xattr:
                         clear_opt(sbi, XATTR_USER);
                         break;
@@ -244,6 +273,10 @@ static int parse_options(struct super_block *sb, char *options)
                         set_opt(sbi, INLINE_XATTR);
                         break;
  #else
+               case Opt_user_xattr:
+                       f2fs_msg(sb, KERN_INFO,
+                               "user_xattr options not supported");
+                       break;
                 case Opt_nouser_xattr:
                         f2fs_msg(sb, KERN_INFO,
                                 "nouser_xattr options not supported");
@@ -254,10 +287,16 @@ static int parse_options(struct super_block *sb, char *options)
                         break;
  #endif
  #ifdef CONFIG_F2FS_FS_POSIX_ACL
+               case Opt_acl:
+                       set_opt(sbi, POSIX_ACL);
+                       break;
                 case Opt_noacl:
                         clear_opt(sbi, POSIX_ACL);
                         break;
  #else
+               case Opt_acl:
+                       f2fs_msg(sb, KERN_INFO, "acl options not supported");
+                       break;
                 case Opt_noacl:
                         f2fs_msg(sb, KERN_INFO, "noacl options not supported");
                         break;
@@ -355,7 +394,9 @@ static void f2fs_put_super(struct super_block *sb)
         f2fs_destroy_stats(sbi);
         stop_gc_thread(sbi);
  
-       write_checkpoint(sbi, true);
+       /* We don't need to do checkpoint when it's clean */
+       if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES))
+               write_checkpoint(sbi, true);
  
         iput(sbi->node_inode);
         iput(sbi->meta_inode);
@@ -727,30 +768,47 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
                 atomic_set(&sbi->nr_pages[i], 0);
  }
  
-static int validate_superblock(struct super_block *sb,
-               struct f2fs_super_block **raw_super,
-               struct buffer_head **raw_super_buf, sector_t block)
+/*
+ * Read f2fs raw super block.
+ * Because we have two copies of super block, so read the first one at first,
+ * if the first one is invalid, move to read the second one.
+ */
+static int read_raw_super_block(struct super_block *sb,
+                       struct f2fs_super_block **raw_super,
+                       struct buffer_head **raw_super_buf)
  {
-       const char *super = (block == 0 ? "first" : "second");
+       int block = 0;
  
-       /* read f2fs raw super block */
+retry:
         *raw_super_buf = sb_bread(sb, block);
         if (!*raw_super_buf) {
-               f2fs_msg(sb, KERN_ERR, "unable to read %s superblock",
-                               super);
-               return -EIO;
+               f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
+                               block + 1);
+               if (block == 0) {
+                       block++;
+                       goto retry;
+               } else {
+                       return -EIO;
+               }
         }
  
         *raw_super = (struct f2fs_super_block *)
                 ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
  
         /* sanity checking of raw super */
-       if (!sanity_check_raw_super(sb, *raw_super))
-               return 0;
+       if (sanity_check_raw_super(sb, *raw_super)) {
+               brelse(*raw_super_buf);
+               f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
+                               "in %dth superblock", block + 1);
+               if(block == 0) {
+                       block++;
+                       goto retry;
+               } else {
+                       return -EINVAL;
+               }
+       }
  
-       f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
-                               "in %s superblock", super);
-       return -EINVAL;
+       return 0;
  }
  
  static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -760,7 +818,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
         struct buffer_head *raw_super_buf;
         struct inode *root;
         long err = -EINVAL;
-       int i;
  
         /* allocate memory for f2fs-specific super block info */
         sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
@@ -773,14 +830,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
                 goto free_sbi;
         }
  
-       err = validate_superblock(sb, &raw_super, &raw_super_buf, 0);
-       if (err) {
-               brelse(raw_super_buf);
-               /* check secondary superblock when primary failed */
-               err = validate_superblock(sb, &raw_super, &raw_super_buf, 1);
-               if (err)
-                       goto free_sb_buf;
-       }
+       err = read_raw_super_block(sb, &raw_super, &raw_super_buf);
+       if (err)
+               goto free_sbi;
+
         sb->s_fs_info = sbi;
         /* init some FS parameters */
         sbi->active_logs = NR_CURSEG_TYPE;
@@ -818,12 +871,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
         mutex_init(&sbi->gc_mutex);
         mutex_init(&sbi->writepages);
         mutex_init(&sbi->cp_mutex);
-       for (i = 0; i < NR_GLOBAL_LOCKS; i++)
-               mutex_init(&sbi->fs_lock[i]);
         mutex_init(&sbi->node_write);
-       sbi->por_doing = 0;
+       sbi->por_doing = false;
         spin_lock_init(&sbi->stat_lock);
         init_rwsem(&sbi->bio_sem);
+       init_rwsem(&sbi->cp_rwsem);
+       init_waitqueue_head(&sbi->cp_wait);
         init_sb_info(sbi);
  
         /* get an inode for meta space */
@@ -922,12 +975,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
                 /* After POR, we can run background GC thread.*/
                 err = start_gc_thread(sbi);
                 if (err)
-                       goto fail;
+                       goto free_gc;
         }
  
         err = f2fs_build_stats(sbi);
         if (err)
-               goto fail;
+               goto free_gc;
  
         if (f2fs_proc_root)
                 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -953,6 +1006,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
  
         return 0;
  fail:
+       if (sbi->s_proc) {
+               remove_proc_entry("segment_info", sbi->s_proc);
+               remove_proc_entry(sb->s_id, f2fs_proc_root);
+       }
+       f2fs_destroy_stats(sbi);
+free_gc:
         stop_gc_thread(sbi);
  free_root_inode:
         dput(sb->s_root);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c

index 1ac8a5f..aa7a3f1 100644 (file)
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -154,6 +154,9 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
  }
  
  #ifdef CONFIG_F2FS_FS_SECURITY
+static int __f2fs_setxattr(struct inode *inode, int name_index,
+                       const char *name, const void *value, size_t value_len,
+                       struct page *ipage);
  static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
                 void *page)
  {
@@ -161,7 +164,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
         int err = 0;
  
         for (xattr = xattr_array; xattr->name != NULL; xattr++) {
-               err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
+               err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
                                 xattr->name, xattr->value,
                                 xattr->value_len, (struct page *)page);
                 if (err < 0)
@@ -369,7 +372,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
                         alloc_nid_failed(sbi, new_nid);
                         return PTR_ERR(xpage);
                 }
-               BUG_ON(new_nid);
+               f2fs_bug_on(new_nid);
         } else {
                 struct dnode_of_data dn;
                 set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -469,16 +472,15 @@ cleanup:
         return error;
  }
  
-int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
-                       const void *value, size_t value_len, struct page *ipage)
+static int __f2fs_setxattr(struct inode *inode, int name_index,
+                       const char *name, const void *value, size_t value_len,
+                       struct page *ipage)
  {
-       struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
         struct f2fs_inode_info *fi = F2FS_I(inode);
         struct f2fs_xattr_entry *here, *last;
         void *base_addr;
         int found, newsize;
         size_t name_len;
-       int ilock;
         __u32 new_hsize;
         int error = -ENOMEM;
  
@@ -493,10 +495,6 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
         if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode))
                 return -ERANGE;
  
-       f2fs_balance_fs(sbi);
-
-       ilock = mutex_lock_op(sbi);
-
         base_addr = read_all_xattrs(inode, ipage);
         if (!base_addr)
                 goto exit;
@@ -522,7 +520,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
                  */
                 free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
                 if (found)
-                       free = free - ENTRY_SIZE(here);
+                       free = free + ENTRY_SIZE(here);
  
                 if (free < newsize) {
                         error = -ENOSPC;
@@ -578,7 +576,21 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
         else
                 update_inode_page(inode);
  exit:
-       mutex_unlock_op(sbi, ilock);
         kzfree(base_addr);
         return error;
  }
+
+int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
+                       const void *value, size_t value_len, struct page *ipage)
+{
+       struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+       int err;
+
+       f2fs_balance_fs(sbi);
+
+       f2fs_lock_op(sbi);
+       err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
+       f2fs_unlock_op(sbi);
+
+       return err;
+}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h

index 4241e6f..7c31f4b 100644 (file)
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -102,6 +102,7 @@ struct msdos_sb_info {
         struct hlist_head dir_hashtable[FAT_HASH_SIZE];
  
         unsigned int dirty;           /* fs state before mount */
+       struct rcu_head rcu;
  };
  
  #define FAT_CACHE_VALID        0       /* special case for valid cache */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c

index 0062da2..854b578 100644 (file)
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -548,6 +548,16 @@ static void fat_set_state(struct super_block *sb,
         brelse(bh);
  }
  
+static void delayed_free(struct rcu_head *p)
+{
+       struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
+       unload_nls(sbi->nls_disk);
+       unload_nls(sbi->nls_io);
+       if (sbi->options.iocharset != fat_default_iocharset)
+               kfree(sbi->options.iocharset);
+       kfree(sbi);
+}
+
  static void fat_put_super(struct super_block *sb)
  {
         struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -557,14 +567,7 @@ static void fat_put_super(struct super_block *sb)
         iput(sbi->fsinfo_inode);
         iput(sbi->fat_inode);
  
-       unload_nls(sbi->nls_disk);
-       unload_nls(sbi->nls_io);
-
-       if (sbi->options.iocharset != fat_default_iocharset)
-               kfree(sbi->options.iocharset);
-
-       sb->s_fs_info = NULL;
-       kfree(sbi);
+       call_rcu(&sbi->rcu, delayed_free);
  }
  
  static struct kmem_cache *fat_inode_cachep;
diff --git a/fs/fcntl.c b/fs/fcntl.c

index 65343c3..ef68665 100644 (file)
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -56,7 +56,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
                                 return -EINVAL;
         }
  
-       if (filp->f_op && filp->f_op->check_flags)
+       if (filp->f_op->check_flags)
                 error = filp->f_op->check_flags(arg);
         if (error)
                 return error;
@@ -64,8 +64,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
         /*
          * ->fasync() is responsible for setting the FASYNC bit.
          */
-       if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
-                       filp->f_op->fasync) {
+       if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
                 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
                 if (error < 0)
                         goto out;
diff --git a/fs/file_table.c b/fs/file_table.c

index e900ca5..5fff903 100644 (file)
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -36,8 +36,6 @@ struct files_stat_struct files_stat = {
         .max_files = NR_FILE
  };
  
-DEFINE_STATIC_LGLOCK(files_lglock);
-
  /* SLAB cache for file structures */
  static struct kmem_cache *filp_cachep __read_mostly;
  
@@ -134,7 +132,6 @@ struct file *get_empty_filp(void)
                 return ERR_PTR(error);
         }
  
-       INIT_LIST_HEAD(&f->f_u.fu_list);
         atomic_long_set(&f->f_count, 1);
         rwlock_init(&f->f_owner.lock);
         spin_lock_init(&f->f_lock);
@@ -240,11 +237,11 @@ static void __fput(struct file *file)
         locks_remove_flock(file);
  
         if (unlikely(file->f_flags & FASYNC)) {
-               if (file->f_op && file->f_op->fasync)
+               if (file->f_op->fasync)
                         file->f_op->fasync(-1, file, 0);
         }
         ima_file_free(file);
-       if (file->f_op && file->f_op->release)
+       if (file->f_op->release)
                 file->f_op->release(inode, file);
         security_file_free(file);
         if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
@@ -304,7 +301,6 @@ void fput(struct file *file)
         if (atomic_long_dec_and_test(&file->f_count)) {
                 struct task_struct *task = current;
  
-               file_sb_list_del(file);
                 if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
                         init_task_work(&file->f_u.fu_rcuhead, ____fput);
                         if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
@@ -333,7 +329,6 @@ void __fput_sync(struct file *file)
  {
         if (atomic_long_dec_and_test(&file->f_count)) {
                 struct task_struct *task = current;
-               file_sb_list_del(file);
                 BUG_ON(!(task->flags & PF_KTHREAD));
                 __fput(file);
         }
@@ -345,129 +340,10 @@ void put_filp(struct file *file)
  {
         if (atomic_long_dec_and_test(&file->f_count)) {
                 security_file_free(file);
-               file_sb_list_del(file);
                 file_free(file);
         }
  }
  
-static inline int file_list_cpu(struct file *file)
-{
-#ifdef CONFIG_SMP
-       return file->f_sb_list_cpu;
-#else
-       return smp_processor_id();
-#endif
-}
-
-/* helper for file_sb_list_add to reduce ifdefs */
-static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
-{
-       struct list_head *list;
-#ifdef CONFIG_SMP
-       int cpu;
-       cpu = smp_processor_id();
-       file->f_sb_list_cpu = cpu;
-       list = per_cpu_ptr(sb->s_files, cpu);
-#else
-       list = &sb->s_files;
-#endif
-       list_add(&file->f_u.fu_list, list);
-}
-
-/**
- * file_sb_list_add - add a file to the sb's file list
- * @file: file to add
- * @sb: sb to add it to
- *
- * Use this function to associate a file with the superblock of the inode it
- * refers to.
- */
-void file_sb_list_add(struct file *file, struct super_block *sb)
-{
-       if (likely(!(file->f_mode & FMODE_WRITE)))
-               return;
-       if (!S_ISREG(file_inode(file)->i_mode))
-               return;
-       lg_local_lock(&files_lglock);
-       __file_sb_list_add(file, sb);
-       lg_local_unlock(&files_lglock);
-}
-
-/**
- * file_sb_list_del - remove a file from the sb's file list
- * @file: file to remove
- * @sb: sb to remove it from
- *
- * Use this function to remove a file from its superblock.
- */
-void file_sb_list_del(struct file *file)
-{
-       if (!list_empty(&file->f_u.fu_list)) {
-               lg_local_lock_cpu(&files_lglock, file_list_cpu(file));
-               list_del_init(&file->f_u.fu_list);
-               lg_local_unlock_cpu(&files_lglock, file_list_cpu(file));
-       }
-}
-
-#ifdef CONFIG_SMP
-
-/*
- * These macros iterate all files on all CPUs for a given superblock.
- * files_lglock must be held globally.
- */
-#define do_file_list_for_each_entry(__sb, __file)              \
-{                                                              \
-       int i;                                                  \
-       for_each_possible_cpu(i) {                              \
-               struct list_head *list;                         \
-               list = per_cpu_ptr((__sb)->s_files, i);         \
-               list_for_each_entry((__file), list, f_u.fu_list)
-
-#define while_file_list_for_each_entry                         \
-       }                                                       \
-}
-
-#else
-
-#define do_file_list_for_each_entry(__sb, __file)              \
-{                                                              \
-       struct list_head *list;                                 \
-       list = &(sb)->s_files;                                  \
-       list_for_each_entry((__file), list, f_u.fu_list)
-
-#define while_file_list_for_each_entry                         \
-}
-
-#endif
-
-/**
- *     mark_files_ro - mark all files read-only
- *     @sb: superblock in question
- *
- *     All files are marked read-only.  We don't care about pending
- *     delete files so this should be used in 'force' mode only.
- */
-void mark_files_ro(struct super_block *sb)
-{
-       struct file *f;
-
-       lg_global_lock(&files_lglock);
-       do_file_list_for_each_entry(sb, f) {
-               if (!file_count(f))
-                       continue;
-               if (!(f->f_mode & FMODE_WRITE))
-                       continue;
-               spin_lock(&f->f_lock);
-               f->f_mode &= ~FMODE_WRITE;
-               spin_unlock(&f->f_lock);
-               if (file_check_writeable(f) != 0)
-                       continue;
-               __mnt_drop_write(f->f_path.mnt);
-               file_release_write(f);
-       } while_file_list_for_each_entry;
-       lg_global_unlock(&files_lglock);
-}
-
  void __init files_init(unsigned long mempages)
  { 
         unsigned long n;
@@ -483,6 +359,5 @@ void __init files_init(unsigned long mempages)
         n = (mempages * (PAGE_SIZE / 1024)) / 10;
         files_stat.max_files = max_t(unsigned long, n, NR_FILE);
         files_defer_init();
-       lg_lock_init(&files_lglock, "files_lglock");
         percpu_counter_init(&nr_files, 0);
  } 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index 4afdbd6..1f4a10e 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,6 +26,7 @@
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
  #include <linux/tracepoint.h>
+#include <linux/device.h>
  #include "internal.h"
  
  /*
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c

index adbfd66..b96a49b 100644 (file)
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -473,7 +473,7 @@ err:
  static void cuse_fc_release(struct fuse_conn *fc)
  {
         struct cuse_conn *cc = fc_to_cc(fc);
-       kfree(cc);
+       kfree_rcu(cc, fc.rcu);
  }
  
  /**
@@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = {
  ATTRIBUTE_GROUPS(cuse_class_dev);
  
  static struct miscdevice cuse_miscdev = {
-       .minor          = MISC_DYNAMIC_MINOR,
+       .minor          = CUSE_MINOR,
         .name           = "cuse",
         .fops           = &cuse_channel_fops,
  };
  
+MODULE_ALIAS_MISCDEV(CUSE_MINOR);
+MODULE_ALIAS("devname:cuse");
+
  static int __init cuse_init(void)
  {
         int i, rc;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c

index b7989f2..c3eb2c4 100644 (file)
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -342,24 +342,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
         return err;
  }
  
-static struct dentry *fuse_materialise_dentry(struct dentry *dentry,
-                                             struct inode *inode)
-{
-       struct dentry *newent;
-
-       if (inode && S_ISDIR(inode->i_mode)) {
-               struct fuse_conn *fc = get_fuse_conn(inode);
-
-               mutex_lock(&fc->inst_mutex);
-               newent = d_materialise_unique(dentry, inode);
-               mutex_unlock(&fc->inst_mutex);
-       } else {
-               newent = d_materialise_unique(dentry, inode);
-       }
-
-       return newent;
-}
-
  static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
                                   unsigned int flags)
  {
@@ -382,7 +364,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
         if (inode && get_node_id(inode) == FUSE_ROOT_ID)
                 goto out_iput;
  
-       newent = fuse_materialise_dentry(entry, inode);
+       newent = d_materialise_unique(entry, inode);
         err = PTR_ERR(newent);
         if (IS_ERR(newent))
                 goto out_err;
@@ -601,21 +583,9 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
         }
         kfree(forget);
  
-       if (S_ISDIR(inode->i_mode)) {
-               struct dentry *alias;
-               mutex_lock(&fc->inst_mutex);
-               alias = d_find_alias(inode);
-               if (alias) {
-                       /* New directory must have moved since mkdir */
-                       mutex_unlock(&fc->inst_mutex);
-                       dput(alias);
-                       iput(inode);
-                       return -EBUSY;
-               }
-               d_instantiate(entry, inode);
-               mutex_unlock(&fc->inst_mutex);
-       } else
-               d_instantiate(entry, inode);
+       err = d_instantiate_no_diralias(entry, inode);
+       if (err)
+               return err;
  
         fuse_change_entry_timeout(entry, &outarg);
         fuse_invalidate_attr(dir);
@@ -1284,7 +1254,7 @@ static int fuse_direntplus_link(struct file *file,
         if (!inode)
                 goto out;
  
-       alias = fuse_materialise_dentry(dentry, inode);
+       alias = d_materialise_unique(dentry, inode);
         err = PTR_ERR(alias);
         if (IS_ERR(alias))
                 goto out;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c

index 4598345..7e70506 100644 (file)
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
  
                 BUG_ON(req->inode != inode);
                 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
-               if (curr_index == index) {
+               if (curr_index <= index &&
+                   index < curr_index + req->num_pages) {
                         found = true;
                         break;
                 }
@@ -1409,8 +1410,13 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
  
  static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
  {
-       __free_page(req->pages[0]);
-       fuse_file_put(req->ff, false);
+       int i;
+
+       for (i = 0; i < req->num_pages; i++)
+               __free_page(req->pages[i]);
+
+       if (req->ff)
+               fuse_file_put(req->ff, false);
  }
  
  static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
         struct inode *inode = req->inode;
         struct fuse_inode *fi = get_fuse_inode(inode);
         struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+       int i;
  
         list_del(&req->writepages_entry);
-       dec_bdi_stat(bdi, BDI_WRITEBACK);
-       dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
-       bdi_writeout_inc(bdi);
+       for (i = 0; i < req->num_pages; i++) {
+               dec_bdi_stat(bdi, BDI_WRITEBACK);
+               dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
+               bdi_writeout_inc(bdi);
+       }
         wake_up(&fi->page_waitq);
  }
  
  /* Called under fc->lock, may release and reacquire it */
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
+                               loff_t size)
  __releases(fc->lock)
  __acquires(fc->lock)
  {
         struct fuse_inode *fi = get_fuse_inode(req->inode);
-       loff_t size = i_size_read(req->inode);
         struct fuse_write_in *inarg = &req->misc.write.in;
+       __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
  
         if (!fc->connected)
                 goto out_free;
  
-       if (inarg->offset + PAGE_CACHE_SIZE <= size) {
-               inarg->size = PAGE_CACHE_SIZE;
+       if (inarg->offset + data_size <= size) {
+               inarg->size = data_size;
         } else if (inarg->offset < size) {
-               inarg->size = size & (PAGE_CACHE_SIZE - 1);
+               inarg->size = size - inarg->offset;
         } else {
                 /* Got truncated off completely */
                 goto out_free;
@@ -1472,12 +1482,13 @@ __acquires(fc->lock)
  {
         struct fuse_conn *fc = get_fuse_conn(inode);
         struct fuse_inode *fi = get_fuse_inode(inode);
+       size_t crop = i_size_read(inode);
         struct fuse_req *req;
  
         while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
                 req = list_entry(fi->queued_writes.next, struct fuse_req, list);
                 list_del_init(&req->list);
-               fuse_send_writepage(fc, req);
+               fuse_send_writepage(fc, req, crop);
         }
  }
  
@@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
  
         mapping_set_error(inode->i_mapping, req->out.h.error);
         spin_lock(&fc->lock);
+       while (req->misc.write.next) {
+               struct fuse_conn *fc = get_fuse_conn(inode);
+               struct fuse_write_in *inarg = &req->misc.write.in;
+               struct fuse_req *next = req->misc.write.next;
+               req->misc.write.next = next->misc.write.next;
+               next->misc.write.next = NULL;
+               next->ff = fuse_file_get(req->ff);
+               list_add(&next->writepages_entry, &fi->writepages);
+
+               /*
+                * Skip fuse_flush_writepages() to make it easy to crop requests
+                * based on primary request size.
+                *
+                * 1st case (trivial): there are no concurrent activities using
+                * fuse_set/release_nowrite.  Then we're on safe side because
+                * fuse_flush_writepages() would call fuse_send_writepage()
+                * anyway.
+                *
+                * 2nd case: someone called fuse_set_nowrite and it is waiting
+                * now for completion of all in-flight requests.  This happens
+                * rarely and no more than once per page, so this should be
+                * okay.
+                *
+                * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
+                * of fuse_set_nowrite..fuse_release_nowrite section.  The fact
+                * that fuse_set_nowrite returned implies that all in-flight
+                * requests were completed along with all of their secondary
+                * requests.  Further primary requests are blocked by negative
+                * writectr.  Hence there cannot be any in-flight requests and
+                * no invocations of fuse_writepage_end() while we're in
+                * fuse_set_nowrite..fuse_release_nowrite section.
+                */
+               fuse_send_writepage(fc, next, inarg->offset + inarg->size);
+       }
         fi->writectr--;
         fuse_writepage_finish(fc, req);
         spin_unlock(&fc->lock);
         fuse_writepage_free(fc, req);
  }
  
+static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
+                                            struct fuse_inode *fi)
+{
+       struct fuse_file *ff = NULL;
+
+       spin_lock(&fc->lock);
+       if (!WARN_ON(list_empty(&fi->write_files))) {
+               ff = list_entry(fi->write_files.next, struct fuse_file,
+                               write_entry);
+               fuse_file_get(ff);
+       }
+       spin_unlock(&fc->lock);
+
+       return ff;
+}
+
  static int fuse_writepage_locked(struct page *page)
  {
         struct address_space *mapping = page->mapping;
@@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page)
         struct fuse_conn *fc = get_fuse_conn(inode);
         struct fuse_inode *fi = get_fuse_inode(inode);
         struct fuse_req *req;
-       struct fuse_file *ff;
         struct page *tmp_page;
+       int error = -ENOMEM;
  
         set_page_writeback(page);
  
@@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page)
         if (!tmp_page)
                 goto err_free;
  
-       spin_lock(&fc->lock);
-       BUG_ON(list_empty(&fi->write_files));
-       ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
-       req->ff = fuse_file_get(ff);
-       spin_unlock(&fc->lock);
+       error = -EIO;
+       req->ff = fuse_write_file_get(fc, fi);
+       if (!req->ff)
+               goto err_free;
  
-       fuse_write_fill(req, ff, page_offset(page), 0);
+       fuse_write_fill(req, req->ff, page_offset(page), 0);
  
         copy_highpage(tmp_page, page);
         req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+       req->misc.write.next = NULL;
         req->in.argpages = 1;
         req->num_pages = 1;
         req->pages[0] = tmp_page;
@@ -1550,19 +1611,263 @@ err_free:
         fuse_request_free(req);
  err:
         end_page_writeback(page);
-       return -ENOMEM;
+       return error;
  }
  
  static int fuse_writepage(struct page *page, struct writeback_control *wbc)
  {
         int err;
  
+       if (fuse_page_is_writeback(page->mapping->host, page->index)) {
+               /*
+                * ->writepages() should be called for sync() and friends.  We
+                * should only get here on direct reclaim and then we are
+                * allowed to skip a page which is already in flight
+                */
+               WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
+
+               redirty_page_for_writepage(wbc, page);
+               return 0;
+       }
+
         err = fuse_writepage_locked(page);
         unlock_page(page);
  
         return err;
  }
  
+struct fuse_fill_wb_data {
+       struct fuse_req *req;
+       struct fuse_file *ff;
+       struct inode *inode;
+       struct page **orig_pages;
+};
+
+static void fuse_writepages_send(struct fuse_fill_wb_data *data)
+{
+       struct fuse_req *req = data->req;
+       struct inode *inode = data->inode;
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       struct fuse_inode *fi = get_fuse_inode(inode);
+       int num_pages = req->num_pages;
+       int i;
+
+       req->ff = fuse_file_get(data->ff);
+       spin_lock(&fc->lock);
+       list_add_tail(&req->list, &fi->queued_writes);
+       fuse_flush_writepages(inode);
+       spin_unlock(&fc->lock);
+
+       for (i = 0; i < num_pages; i++)
+               end_page_writeback(data->orig_pages[i]);
+}
+
+static bool fuse_writepage_in_flight(struct fuse_req *new_req,
+                                    struct page *page)
+{
+       struct fuse_conn *fc = get_fuse_conn(new_req->inode);
+       struct fuse_inode *fi = get_fuse_inode(new_req->inode);
+       struct fuse_req *tmp;
+       struct fuse_req *old_req;
+       bool found = false;
+       pgoff_t curr_index;
+
+       BUG_ON(new_req->num_pages != 0);
+
+       spin_lock(&fc->lock);
+       list_del(&new_req->writepages_entry);
+       list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
+               BUG_ON(old_req->inode != new_req->inode);
+               curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+               if (curr_index <= page->index &&
+                   page->index < curr_index + old_req->num_pages) {
+                       found = true;
+                       break;
+               }
+       }
+       if (!found) {
+               list_add(&new_req->writepages_entry, &fi->writepages);
+               goto out_unlock;
+       }
+
+       new_req->num_pages = 1;
+       for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
+               BUG_ON(tmp->inode != new_req->inode);
+               curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+               if (tmp->num_pages == 1 &&
+                   curr_index == page->index) {
+                       old_req = tmp;
+               }
+       }
+
+       if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
+                                       old_req->state == FUSE_REQ_PENDING)) {
+               struct backing_dev_info *bdi = page->mapping->backing_dev_info;
+
+               copy_highpage(old_req->pages[0], page);
+               spin_unlock(&fc->lock);
+
+               dec_bdi_stat(bdi, BDI_WRITEBACK);
+               dec_zone_page_state(page, NR_WRITEBACK_TEMP);
+               bdi_writeout_inc(bdi);
+               fuse_writepage_free(fc, new_req);
+               fuse_request_free(new_req);
+               goto out;
+       } else {
+               new_req->misc.write.next = old_req->misc.write.next;
+               old_req->misc.write.next = new_req;
+       }
+out_unlock:
+       spin_unlock(&fc->lock);
+out:
+       return found;
+}
+
+static int fuse_writepages_fill(struct page *page,
+               struct writeback_control *wbc, void *_data)
+{
+       struct fuse_fill_wb_data *data = _data;
+       struct fuse_req *req = data->req;
+       struct inode *inode = data->inode;
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       struct page *tmp_page;
+       bool is_writeback;
+       int err;
+
+       if (!data->ff) {
+               err = -EIO;
+               data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
+               if (!data->ff)
+                       goto out_unlock;
+       }
+
+       /*
+        * Being under writeback is unlikely but possible.  For example direct
+        * read to an mmaped fuse file will set the page dirty twice; once when
+        * the pages are faulted with get_user_pages(), and then after the read
+        * completed.
+        */
+       is_writeback = fuse_page_is_writeback(inode, page->index);
+
+       if (req && req->num_pages &&
+           (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+            (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+            data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
+               fuse_writepages_send(data);
+               data->req = NULL;
+       }
+       err = -ENOMEM;
+       tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+       if (!tmp_page)
+               goto out_unlock;
+
+       /*
+        * The page must not be redirtied until the writeout is completed
+        * (i.e. userspace has sent a reply to the write request).  Otherwise
+        * there could be more than one temporary page instance for each real
+        * page.
+        *
+        * This is ensured by holding the page lock in page_mkwrite() while
+        * checking fuse_page_is_writeback().  We already hold the page lock
+        * since clear_page_dirty_for_io() and keep it held until we add the
+        * request to the fi->writepages list and increment req->num_pages.
+        * After this fuse_page_is_writeback() will indicate that the page is
+        * under writeback, so we can release the page lock.
+        */
+       if (data->req == NULL) {
+               struct fuse_inode *fi = get_fuse_inode(inode);
+
+               err = -ENOMEM;
+               req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
+               if (!req) {
+                       __free_page(tmp_page);
+                       goto out_unlock;
+               }
+
+               fuse_write_fill(req, data->ff, page_offset(page), 0);
+               req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+               req->misc.write.next = NULL;
+               req->in.argpages = 1;
+               req->background = 1;
+               req->num_pages = 0;
+               req->end = fuse_writepage_end;
+               req->inode = inode;
+
+               spin_lock(&fc->lock);
+               list_add(&req->writepages_entry, &fi->writepages);
+               spin_unlock(&fc->lock);
+
+               data->req = req;
+       }
+       set_page_writeback(page);
+
+       copy_highpage(tmp_page, page);
+       req->pages[req->num_pages] = tmp_page;
+       req->page_descs[req->num_pages].offset = 0;
+       req->page_descs[req->num_pages].length = PAGE_SIZE;
+
+       inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
+       inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+
+       err = 0;
+       if (is_writeback && fuse_writepage_in_flight(req, page)) {
+               end_page_writeback(page);
+               data->req = NULL;
+               goto out_unlock;
+       }
+       data->orig_pages[req->num_pages] = page;
+
+       /*
+        * Protected by fc->lock against concurrent access by
+        * fuse_page_is_writeback().
+        */
+       spin_lock(&fc->lock);
+       req->num_pages++;
+       spin_unlock(&fc->lock);
+
+out_unlock:
+       unlock_page(page);
+
+       return err;
+}
+
+static int fuse_writepages(struct address_space *mapping,
+                          struct writeback_control *wbc)
+{
+       struct inode *inode = mapping->host;
+       struct fuse_fill_wb_data data;
+       int err;
+
+       err = -EIO;
+       if (is_bad_inode(inode))
+               goto out;
+
+       data.inode = inode;
+       data.req = NULL;
+       data.ff = NULL;
+
+       err = -ENOMEM;
+       data.orig_pages = kzalloc(sizeof(struct page *) *
+                                 FUSE_MAX_PAGES_PER_REQ,
+                                 GFP_NOFS);
+       if (!data.orig_pages)
+               goto out;
+
+       err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
+       if (data.req) {
+               /* Ignore errors if we can write at least one page */
+               BUG_ON(!data.req->num_pages);
+               fuse_writepages_send(&data);
+               err = 0;
+       }
+       if (data.ff)
+               fuse_file_put(data.ff, false);
+
+       kfree(data.orig_pages);
+out:
+       return err;
+}
+
  static int fuse_launder_page(struct page *page)
  {
         int err = 0;
@@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
  static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
  {
         struct page *page = vmf->page;
-       /*
-        * Don't use page->mapping as it may become NULL from a
-        * concurrent truncate.
-        */
-       struct inode *inode = vma->vm_file->f_mapping->host;
+       struct inode *inode = file_inode(vma->vm_file);
+
+       file_update_time(vma->vm_file);
+       lock_page(page);
+       if (page->mapping != inode->i_mapping) {
+               unlock_page(page);
+               return VM_FAULT_NOPAGE;
+       }
  
         fuse_wait_on_page_writeback(inode, page->index);
-       return 0;
+       return VM_FAULT_LOCKED;
  }
  
  static const struct vm_operations_struct fuse_file_vm_ops = {
@@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
  static const struct address_space_operations fuse_file_aops  = {
         .readpage       = fuse_readpage,
         .writepage      = fuse_writepage,
+       .writepages     = fuse_writepages,
         .launder_page   = fuse_launder_page,
         .readpages      = fuse_readpages,
         .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h

index 5b9e6f3..7d27309 100644 (file)
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -321,6 +321,7 @@ struct fuse_req {
                 struct {
                         struct fuse_write_in in;
                         struct fuse_write_out out;
+                       struct fuse_req *next;
                 } write;
                 struct fuse_notify_retrieve_in retrieve_in;
                 struct fuse_lk_in lk_in;
@@ -374,12 +375,11 @@ struct fuse_conn {
         /** Lock protecting accessess to  members of this structure */
         spinlock_t lock;
  
-       /** Mutex protecting against directory alias creation */
-       struct mutex inst_mutex;
-
         /** Refcount */
         atomic_t count;
  
+       struct rcu_head rcu;
+
         /** The user id for this mount */
         kuid_t user_id;
  
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index a8ce6da..d468643 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -565,7 +565,6 @@ void fuse_conn_init(struct fuse_conn *fc)
  {
         memset(fc, 0, sizeof(*fc));
         spin_lock_init(&fc->lock);
-       mutex_init(&fc->inst_mutex);
         init_rwsem(&fc->killsb);
         atomic_set(&fc->count, 1);
         init_waitqueue_head(&fc->waitq);
@@ -596,7 +595,6 @@ void fuse_conn_put(struct fuse_conn *fc)
         if (atomic_dec_and_test(&fc->count)) {
                 if (fc->destroy_req)
                         fuse_request_free(fc->destroy_req);
-               mutex_destroy(&fc->inst_mutex);
                 fc->release(fc);
         }
  }
@@ -920,7 +918,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
  
  static void fuse_free_conn(struct fuse_conn *fc)
  {
-       kfree(fc);
+       kfree_rcu(fc, rcu);
  }
  
  static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c

index 109ce93..1615df1 100644 (file)
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1514,13 +1514,6 @@ out:
         return NULL;
  }
  
-static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
-       char *s = nd_get_link(nd);
-       if (!IS_ERR(s))
-               kfree(s);
-}
-
  /**
   * gfs2_permission -
   * @inode: The inode
@@ -1872,7 +1865,7 @@ const struct inode_operations gfs2_dir_iops = {
  const struct inode_operations gfs2_symlink_iops = {
         .readlink = generic_readlink,
         .follow_link = gfs2_follow_link,
-       .put_link = gfs2_put_link,
+       .put_link = kfree_put_link,
         .permission = gfs2_permission,
         .setattr = gfs2_setattr,
         .getattr = gfs2_getattr,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h

index 1b39863..6797bf8 100644 (file)
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -80,6 +80,7 @@ struct hpfs_sb_info {
         unsigned sb_c_bitmap;           /* current bitmap */
         unsigned sb_max_fwd_alloc;      /* max forwad allocation */
         int sb_timeshift;
+       struct rcu_head rcu;
  };
  
  /* Four 512-byte buffers and the 2k block obtained by concatenating them */
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c

index 345713d..1b39afd 100644 (file)
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -407,7 +407,7 @@ again:
                         /*printk("HPFS: truncating file before delete.\n");*/
                         newattrs.ia_size = 0;
                         newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-                       err = notify_change(dentry, &newattrs);
+                       err = notify_change(dentry, &newattrs, NULL);
                         put_write_access(inode);
                         if (!err)
                                 goto again;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c

index 4334cda..b8d01ef 100644 (file)
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -101,18 +101,24 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2,
         return 0;
  }
  
-static void hpfs_put_super(struct super_block *s)
+static void free_sbi(struct hpfs_sb_info *sbi)
  {
-       struct hpfs_sb_info *sbi = hpfs_sb(s);
+       kfree(sbi->sb_cp_table);
+       kfree(sbi->sb_bmp_dir);
+       kfree(sbi);
+}
  
+static void lazy_free_sbi(struct rcu_head *rcu)
+{
+       free_sbi(container_of(rcu, struct hpfs_sb_info, rcu));
+}
+
+static void hpfs_put_super(struct super_block *s)
+{
         hpfs_lock(s);
         unmark_dirty(s);
         hpfs_unlock(s);
-
-       kfree(sbi->sb_cp_table);
-       kfree(sbi->sb_bmp_dir);
-       s->s_fs_info = NULL;
-       kfree(sbi);
+       call_rcu(&hpfs_sb(s)->rcu, lazy_free_sbi);
  }
  
  unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -485,9 +491,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
         }
         s->s_fs_info = sbi;
  
-       sbi->sb_bmp_dir = NULL;
-       sbi->sb_cp_table = NULL;
-
         mutex_init(&sbi->hpfs_mutex);
         hpfs_lock(s);
  
@@ -679,10 +682,7 @@ bail2:     brelse(bh0);
  bail1:
  bail0:
         hpfs_unlock(s);
-       kfree(sbi->sb_bmp_dir);
-       kfree(sbi->sb_cp_table);
-       s->s_fs_info = NULL;
-       kfree(sbi);
+       free_sbi(sbi);
         return -EINVAL;
  }
  
diff --git a/fs/inode.c b/fs/inode.c

index b33ba8e..4bcdad3 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -773,15 +773,11 @@ static struct inode *find_inode(struct super_block *sb,
  
  repeat:
         hlist_for_each_entry(inode, head, i_hash) {
-               spin_lock(&inode->i_lock);
-               if (inode->i_sb != sb) {
-                       spin_unlock(&inode->i_lock);
+               if (inode->i_sb != sb)
                         continue;
-               }
-               if (!test(inode, data)) {
-                       spin_unlock(&inode->i_lock);
+               if (!test(inode, data))
                         continue;
-               }
+               spin_lock(&inode->i_lock);
                 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
                         __wait_on_freeing_inode(inode);
                         goto repeat;
@@ -804,15 +800,11 @@ static struct inode *find_inode_fast(struct super_block *sb,
  
  repeat:
         hlist_for_each_entry(inode, head, i_hash) {
-               spin_lock(&inode->i_lock);
-               if (inode->i_ino != ino) {
-                       spin_unlock(&inode->i_lock);
+               if (inode->i_ino != ino)
                         continue;
-               }
-               if (inode->i_sb != sb) {
-                       spin_unlock(&inode->i_lock);
+               if (inode->i_sb != sb)
                         continue;
-               }
+               spin_lock(&inode->i_lock);
                 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
                         __wait_on_freeing_inode(inode);
                         goto repeat;
@@ -950,6 +942,42 @@ void unlock_new_inode(struct inode *inode)
  }
  EXPORT_SYMBOL(unlock_new_inode);
  
+/**
+ * lock_two_nondirectories - take two i_mutexes on non-directory objects
+ * @inode1: first inode to lock
+ * @inode2: second inode to lock
+ */
+void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
+{
+       WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
+       if (inode1 == inode2 || !inode2) {
+               mutex_lock(&inode1->i_mutex);
+               return;
+       }
+       WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
+       if (inode1 < inode2) {
+               mutex_lock(&inode1->i_mutex);
+               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
+       } else {
+               mutex_lock(&inode2->i_mutex);
+               mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2);
+       }
+}
+EXPORT_SYMBOL(lock_two_nondirectories);
+
+/**
+ * unlock_two_nondirectories - release locks from lock_two_nondirectories()
+ * @inode1: first inode to unlock
+ * @inode2: second inode to unlock
+ */
+void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
+{
+       mutex_unlock(&inode1->i_mutex);
+       if (inode2 && inode2 != inode1)
+               mutex_unlock(&inode2->i_mutex);
+}
+EXPORT_SYMBOL(unlock_two_nondirectories);
+
  /**
   * iget5_locked - obtain an inode from a mounted file system
   * @sb:                super block of file system
@@ -1575,7 +1603,11 @@ static int __remove_suid(struct dentry *dentry, int kill)
         struct iattr newattrs;
  
         newattrs.ia_valid = ATTR_FORCE | kill;
-       return notify_change(dentry, &newattrs);
+       /*
+        * Note we call this on write, so notify_change will not
+        * encounter any conflicting delegations:
+        */
+       return notify_change(dentry, &newattrs, NULL);
  }
  
  int file_remove_suid(struct file *file)
diff --git a/fs/internal.h b/fs/internal.h

index 513e0d8..4657424 100644 (file)
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,8 +9,6 @@
   * 2 of the License, or (at your option) any later version.
   */
  
-#include <linux/lglock.h>
-
  struct super_block;
  struct file_system_type;
  struct linux_binprm;
@@ -62,8 +60,6 @@ extern int sb_prepare_remount_readonly(struct super_block *);
  
  extern void __init mnt_init(void);
  
-extern struct lglock vfsmount_lock;
-
  extern int __mnt_want_write(struct vfsmount *);
  extern int __mnt_want_write_file(struct file *);
  extern void __mnt_drop_write(struct vfsmount *);
@@ -77,9 +73,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
  /*
   * file_table.c
   */
-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
-extern void mark_files_ro(struct super_block *);
  extern struct file *get_empty_filp(void);
  
  /*
diff --git a/fs/ioctl.c b/fs/ioctl.c

index fd507fb..8ac3fad 100644 (file)
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -37,7 +37,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
  {
         int error = -ENOTTY;
  
-       if (!filp->f_op || !filp->f_op->unlocked_ioctl)
+       if (!filp->f_op->unlocked_ioctl)
                 goto out;
  
         error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
@@ -501,7 +501,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
  
         /* Did FASYNC state change ? */
         if ((flag ^ filp->f_flags) & FASYNC) {
-               if (filp->f_op && filp->f_op->fasync)
+               if (filp->f_op->fasync)
                         /* fasync() adjusts filp->f_flags */
                         error = filp->f_op->fasync(fd, filp, on);
                 else
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c

index e5d408a..4a9e10e 100644 (file)
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -181,7 +181,7 @@ struct iso9660_options{
   * Compute the hash for the isofs name corresponding to the dentry.
   */
  static int
-isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hash_common(struct qstr *qstr, int ms)
  {
         const char *name;
         int len;
@@ -202,7 +202,7 @@ isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
   * Compute the hash for the isofs name corresponding to the dentry.
   */
  static int
-isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hashi_common(struct qstr *qstr, int ms)
  {
         const char *name;
         int len;
@@ -259,13 +259,13 @@ static int isofs_dentry_cmp_common(
  static int
  isofs_hash(const struct dentry *dentry, struct qstr *qstr)
  {
-       return isofs_hash_common(dentry, qstr, 0);
+       return isofs_hash_common(qstr, 0);
  }
  
  static int
  isofs_hashi(const struct dentry *dentry, struct qstr *qstr)
  {
-       return isofs_hashi_common(dentry, qstr, 0);
+       return isofs_hashi_common(qstr, 0);
  }
  
  static int
@@ -286,13 +286,13 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
  static int
  isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr)
  {
-       return isofs_hash_common(dentry, qstr, 1);
+       return isofs_hash_common(qstr, 1);
  }
  
  static int
  isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr)
  {
-       return isofs_hashi_common(dentry, qstr, 1);
+       return isofs_hashi_common(qstr, 1);
  }
  
  static int
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c

index be0c39b..aa603e0 100644 (file)
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -26,7 +26,6 @@
  #include <linux/mm.h>
  #include <linux/highmem.h>
  #include <linux/hrtimer.h>
-#include <linux/backing-dev.h>
  
  static void __journal_temp_unlink_buffer(struct journal_head *jh);
  
@@ -100,10 +99,11 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
  
  alloc_transaction:
         if (!journal->j_running_transaction) {
-               new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
+               new_transaction = kzalloc(sizeof(*new_transaction),
+                                               GFP_NOFS|__GFP_NOFAIL);
                 if (!new_transaction) {
-                       congestion_wait(BLK_RW_ASYNC, HZ/50);
-                       goto alloc_transaction;
+                       ret = -ENOMEM;
+                       goto out;
                 }
         }
  
diff --git a/fs/libfs.c b/fs/libfs.c

index 3a3a9b5..5de0694 100644 (file)
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -10,6 +10,7 @@
  #include <linux/vfs.h>
  #include <linux/quotaops.h>
  #include <linux/mutex.h>
+#include <linux/namei.h>
  #include <linux/exportfs.h>
  #include <linux/writeback.h>
  #include <linux/buffer_head.h> /* sync_mapping_buffers */
@@ -31,6 +32,7 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
         stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
         return 0;
  }
+EXPORT_SYMBOL(simple_getattr);
  
  int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
  {
@@ -39,6 +41,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
         buf->f_namelen = NAME_MAX;
         return 0;
  }
+EXPORT_SYMBOL(simple_statfs);
  
  /*
   * Retaining negative dentries for an in-memory filesystem just wastes
@@ -66,6 +69,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned
         d_add(dentry, NULL);
         return NULL;
  }
+EXPORT_SYMBOL(simple_lookup);
  
  int dcache_dir_open(struct inode *inode, struct file *file)
  {
@@ -75,12 +79,14 @@ int dcache_dir_open(struct inode *inode, struct file *file)
  
         return file->private_data ? 0 : -ENOMEM;
  }
+EXPORT_SYMBOL(dcache_dir_open);
  
  int dcache_dir_close(struct inode *inode, struct file *file)
  {
         dput(file->private_data);
         return 0;
  }
+EXPORT_SYMBOL(dcache_dir_close);
  
  loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
  {
@@ -123,6 +129,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
         mutex_unlock(&dentry->d_inode->i_mutex);
         return offset;
  }
+EXPORT_SYMBOL(dcache_dir_lseek);
  
  /* Relationship between i_mode and the DT_xxx types */
  static inline unsigned char dt_type(struct inode *inode)
@@ -172,11 +179,13 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
         spin_unlock(&dentry->d_lock);
         return 0;
  }
+EXPORT_SYMBOL(dcache_readdir);
  
  ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
  {
         return -EISDIR;
  }
+EXPORT_SYMBOL(generic_read_dir);
  
  const struct file_operations simple_dir_operations = {
         .open           = dcache_dir_open,
@@ -186,10 +195,12 @@ const struct file_operations simple_dir_operations = {
         .iterate        = dcache_readdir,
         .fsync          = noop_fsync,
  };
+EXPORT_SYMBOL(simple_dir_operations);
  
  const struct inode_operations simple_dir_inode_operations = {
         .lookup         = simple_lookup,
  };
+EXPORT_SYMBOL(simple_dir_inode_operations);
  
  static const struct super_operations simple_super_operations = {
         .statfs         = simple_statfs,
@@ -244,6 +255,7 @@ Enomem:
         deactivate_locked_super(s);
         return ERR_PTR(-ENOMEM);
  }
+EXPORT_SYMBOL(mount_pseudo);
  
  int simple_open(struct inode *inode, struct file *file)
  {
@@ -251,6 +263,7 @@ int simple_open(struct inode *inode, struct file *file)
                 file->private_data = inode->i_private;
         return 0;
  }
+EXPORT_SYMBOL(simple_open);
  
  int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
@@ -263,6 +276,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
         d_instantiate(dentry, inode);
         return 0;
  }
+EXPORT_SYMBOL(simple_link);
  
  int simple_empty(struct dentry *dentry)
  {
@@ -283,6 +297,7 @@ out:
         spin_unlock(&dentry->d_lock);
         return ret;
  }
+EXPORT_SYMBOL(simple_empty);
  
  int simple_unlink(struct inode *dir, struct dentry *dentry)
  {
@@ -293,6 +308,7 @@ int simple_unlink(struct inode *dir, struct dentry *dentry)
         dput(dentry);
         return 0;
  }
+EXPORT_SYMBOL(simple_unlink);
  
  int simple_rmdir(struct inode *dir, struct dentry *dentry)
  {
@@ -304,6 +320,7 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
         drop_nlink(dir);
         return 0;
  }
+EXPORT_SYMBOL(simple_rmdir);
  
  int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
                 struct inode *new_dir, struct dentry *new_dentry)
@@ -330,6 +347,7 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
  
         return 0;
  }
+EXPORT_SYMBOL(simple_rename);
  
  /**
   * simple_setattr - setattr for simple filesystem
@@ -370,6 +388,7 @@ int simple_readpage(struct file *file, struct page *page)
         unlock_page(page);
         return 0;
  }
+EXPORT_SYMBOL(simple_readpage);
  
  int simple_write_begin(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned flags,
@@ -393,6 +412,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
         }
         return 0;
  }
+EXPORT_SYMBOL(simple_write_begin);
  
  /**
   * simple_write_end - .write_end helper for non-block-device FSes
@@ -444,6 +464,7 @@ int simple_write_end(struct file *file, struct address_space *mapping,
  
         return copied;
  }
+EXPORT_SYMBOL(simple_write_end);
  
  /*
   * the inodes created here are not hashed. If you use iunique to generate
@@ -512,6 +533,7 @@ out:
         dput(root);
         return -ENOMEM;
  }
+EXPORT_SYMBOL(simple_fill_super);
  
  static DEFINE_SPINLOCK(pin_fs_lock);
  
@@ -534,6 +556,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
         mntput(mnt);
         return 0;
  }
+EXPORT_SYMBOL(simple_pin_fs);
  
  void simple_release_fs(struct vfsmount **mount, int *count)
  {
@@ -545,6 +568,7 @@ void simple_release_fs(struct vfsmount **mount, int *count)
         spin_unlock(&pin_fs_lock);
         mntput(mnt);
  }
+EXPORT_SYMBOL(simple_release_fs);
  
  /**
   * simple_read_from_buffer - copy data from the buffer to user space
@@ -579,6 +603,7 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
         *ppos = pos + count;
         return count;
  }
+EXPORT_SYMBOL(simple_read_from_buffer);
  
  /**
   * simple_write_to_buffer - copy data from user space to the buffer
@@ -613,6 +638,7 @@ ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
         *ppos = pos + count;
         return count;
  }
+EXPORT_SYMBOL(simple_write_to_buffer);
  
  /**
   * memory_read_from_buffer - copy data from the buffer
@@ -644,6 +670,7 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
  
         return count;
  }
+EXPORT_SYMBOL(memory_read_from_buffer);
  
  /*
   * Transaction based IO.
@@ -665,6 +692,7 @@ void simple_transaction_set(struct file *file, size_t n)
         smp_mb();
         ar->size = n;
  }
+EXPORT_SYMBOL(simple_transaction_set);
  
  char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
  {
@@ -696,6 +724,7 @@ char *simple_transaction_get(struct file *file, const char __user *buf, size_t s
  
         return ar->data;
  }
+EXPORT_SYMBOL(simple_transaction_get);
  
  ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
  {
@@ -705,12 +734,14 @@ ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size
                 return 0;
         return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
  }
+EXPORT_SYMBOL(simple_transaction_read);
  
  int simple_transaction_release(struct inode *inode, struct file *file)
  {
         free_page((unsigned long)file->private_data);
         return 0;
  }
+EXPORT_SYMBOL(simple_transaction_release);
  
  /* Simple attribute files */
  
@@ -746,12 +777,14 @@ int simple_attr_open(struct inode *inode, struct file *file,
  
         return nonseekable_open(inode, file);
  }
+EXPORT_SYMBOL_GPL(simple_attr_open);
  
  int simple_attr_release(struct inode *inode, struct file *file)
  {
         kfree(file->private_data);
         return 0;
  }
+EXPORT_SYMBOL_GPL(simple_attr_release);        /* GPL-only?  This?  Really? */
  
  /* read from the buffer that is filled with the get function */
  ssize_t simple_attr_read(struct file *file, char __user *buf,
@@ -787,6 +820,7 @@ out:
         mutex_unlock(&attr->mutex);
         return ret;
  }
+EXPORT_SYMBOL_GPL(simple_attr_read);
  
  /* interpret the buffer as a number to call the set function with */
  ssize_t simple_attr_write(struct file *file, const char __user *buf,
@@ -819,6 +853,7 @@ out:
         mutex_unlock(&attr->mutex);
         return ret;
  }
+EXPORT_SYMBOL_GPL(simple_attr_write);
  
  /**
   * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
@@ -957,39 +992,56 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  {
         return 0;
  }
-
-EXPORT_SYMBOL(dcache_dir_close);
-EXPORT_SYMBOL(dcache_dir_lseek);
-EXPORT_SYMBOL(dcache_dir_open);
-EXPORT_SYMBOL(dcache_readdir);
-EXPORT_SYMBOL(generic_read_dir);
-EXPORT_SYMBOL(mount_pseudo);
-EXPORT_SYMBOL(simple_write_begin);
-EXPORT_SYMBOL(simple_write_end);
-EXPORT_SYMBOL(simple_dir_inode_operations);
-EXPORT_SYMBOL(simple_dir_operations);
-EXPORT_SYMBOL(simple_empty);
-EXPORT_SYMBOL(simple_fill_super);
-EXPORT_SYMBOL(simple_getattr);
-EXPORT_SYMBOL(simple_open);
-EXPORT_SYMBOL(simple_link);
-EXPORT_SYMBOL(simple_lookup);
-EXPORT_SYMBOL(simple_pin_fs);
-EXPORT_SYMBOL(simple_readpage);
-EXPORT_SYMBOL(simple_release_fs);
-EXPORT_SYMBOL(simple_rename);
-EXPORT_SYMBOL(simple_rmdir);
-EXPORT_SYMBOL(simple_statfs);
  EXPORT_SYMBOL(noop_fsync);
-EXPORT_SYMBOL(simple_unlink);
-EXPORT_SYMBOL(simple_read_from_buffer);
-EXPORT_SYMBOL(simple_write_to_buffer);
-EXPORT_SYMBOL(memory_read_from_buffer);
-EXPORT_SYMBOL(simple_transaction_set);
-EXPORT_SYMBOL(simple_transaction_get);
-EXPORT_SYMBOL(simple_transaction_read);
-EXPORT_SYMBOL(simple_transaction_release);
-EXPORT_SYMBOL_GPL(simple_attr_open);
-EXPORT_SYMBOL_GPL(simple_attr_release);
-EXPORT_SYMBOL_GPL(simple_attr_read);
-EXPORT_SYMBOL_GPL(simple_attr_write);
+
+void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
+                               void *cookie)
+{
+       char *s = nd_get_link(nd);
+       if (!IS_ERR(s))
+               kfree(s);
+}
+EXPORT_SYMBOL(kfree_put_link);
+
+/*
+ * nop .set_page_dirty method so that people can use .page_mkwrite on
+ * anon inodes.
+ */
+static int anon_set_page_dirty(struct page *page)
+{
+       return 0;
+};
+
+/*
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
+ */
+struct inode *alloc_anon_inode(struct super_block *s)
+{
+       static const struct address_space_operations anon_aops = {
+               .set_page_dirty = anon_set_page_dirty,
+       };
+       struct inode *inode = new_inode_pseudo(s);
+
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+
+       inode->i_ino = get_next_ino();
+       inode->i_mapping->a_ops = &anon_aops;
+
+       /*
+        * Mark the inode dirty from the very beginning,
+        * that way it will never be moved to the dirty
+        * list because mark_inode_dirty() will think
+        * that it already _is_ on the dirty list.
+        */
+       inode->i_state = I_DIRTY;
+       inode->i_mode = S_IRUSR | S_IWUSR;
+       inode->i_uid = current_fsuid();
+       inode->i_gid = current_fsgid();
+       inode->i_flags |= S_PRIVATE;
+       inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       return inode;
+}
+EXPORT_SYMBOL(alloc_anon_inode);
diff --git a/fs/locks.c b/fs/locks.c

index b27a300..f99d52b 100644 (file)
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -134,7 +134,7 @@
  
  #define IS_POSIX(fl)   (fl->fl_flags & FL_POSIX)
  #define IS_FLOCK(fl)   (fl->fl_flags & FL_FLOCK)
-#define IS_LEASE(fl)   (fl->fl_flags & FL_LEASE)
+#define IS_LEASE(fl)   (fl->fl_flags & (FL_LEASE|FL_DELEG))
  
  static bool lease_breaking(struct file_lock *fl)
  {
@@ -1292,28 +1292,40 @@ static void time_out_leases(struct inode *inode)
         }
  }
  
+static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
+{
+       if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
+               return false;
+       return locks_conflict(breaker, lease);
+}
+
  /**
   *     __break_lease   -       revoke all outstanding leases on file
   *     @inode: the inode of the file to return
- *     @mode: the open mode (read or write)
+ *     @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
+ *         break all leases
+ *     @type: FL_LEASE: break leases and delegations; FL_DELEG: break
+ *         only delegations
   *
   *     break_lease (inlined for speed) has checked there already is at least
   *     some kind of lock (maybe a lease) on this file.  Leases are broken on
   *     a call to open() or truncate().  This function can sleep unless you
   *     specified %O_NONBLOCK to your open().
   */
-int __break_lease(struct inode *inode, unsigned int mode)
+int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
  {
         int error = 0;
         struct file_lock *new_fl, *flock;
         struct file_lock *fl;
         unsigned long break_time;
         int i_have_this_lease = 0;
+       bool lease_conflict = false;
         int want_write = (mode & O_ACCMODE) != O_RDONLY;
  
         new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
         if (IS_ERR(new_fl))
                 return PTR_ERR(new_fl);
+       new_fl->fl_flags = type;
  
         spin_lock(&inode->i_lock);
  
@@ -1323,13 +1335,16 @@ int __break_lease(struct inode *inode, unsigned int mode)
         if ((flock == NULL) || !IS_LEASE(flock))
                 goto out;
  
-       if (!locks_conflict(flock, new_fl))
+       for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
+               if (leases_conflict(fl, new_fl)) {
+                       lease_conflict = true;
+                       if (fl->fl_owner == current->files)
+                               i_have_this_lease = 1;
+               }
+       }
+       if (!lease_conflict)
                 goto out;
  
-       for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
-               if (fl->fl_owner == current->files)
-                       i_have_this_lease = 1;
-
         break_time = 0;
         if (lease_break_time > 0) {
                 break_time = jiffies + lease_break_time * HZ;
@@ -1338,6 +1353,8 @@ int __break_lease(struct inode *inode, unsigned int mode)
         }
  
         for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
+               if (!leases_conflict(fl, new_fl))
+                       continue;
                 if (want_write) {
                         if (fl->fl_flags & FL_UNLOCK_PENDING)
                                 continue;
@@ -1379,7 +1396,7 @@ restart:
                  */
                 for (flock = inode->i_flock; flock && IS_LEASE(flock);
                                 flock = flock->fl_next) {
-                       if (locks_conflict(new_fl, flock))
+                       if (leases_conflict(new_fl, flock))
                                 goto restart;
                 }
                 error = 0;
@@ -1460,9 +1477,26 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
         struct file_lock *fl, **before, **my_before = NULL, *lease;
         struct dentry *dentry = filp->f_path.dentry;
         struct inode *inode = dentry->d_inode;
+       bool is_deleg = (*flp)->fl_flags & FL_DELEG;
         int error;
  
         lease = *flp;
+       /*
+        * In the delegation case we need mutual exclusion with
+        * a number of operations that take the i_mutex.  We trylock
+        * because delegations are an optional optimization, and if
+        * there's some chance of a conflict--we'd rather not
+        * bother, maybe that's a sign this just isn't a good file to
+        * hand out a delegation on.
+        */
+       if (is_deleg && !mutex_trylock(&inode->i_mutex))
+               return -EAGAIN;
+
+       if (is_deleg && arg == F_WRLCK) {
+               /* Write delegations are not currently supported: */
+               WARN_ON_ONCE(1);
+               return -EINVAL;
+       }
  
         error = -EAGAIN;
         if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
@@ -1514,9 +1548,10 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
                 goto out;
  
         locks_insert_lock(before, lease);
-       return 0;
-
+       error = 0;
  out:
+       if (is_deleg)
+               mutex_unlock(&inode->i_mutex);
         return error;
  }
  
@@ -1579,7 +1614,7 @@ EXPORT_SYMBOL(generic_setlease);
  
  static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
  {
-       if (filp->f_op && filp->f_op->setlease)
+       if (filp->f_op->setlease)
                 return filp->f_op->setlease(filp, arg, lease);
         else
                 return generic_setlease(filp, arg, lease);
@@ -1771,7 +1806,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
         if (error)
                 goto out_free;
  
-       if (f.file->f_op && f.file->f_op->flock)
+       if (f.file->f_op->flock)
                 error = f.file->f_op->flock(f.file,
                                           (can_sleep) ? F_SETLKW : F_SETLK,
                                           lock);
@@ -1797,7 +1832,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
   */
  int vfs_test_lock(struct file *filp, struct file_lock *fl)
  {
-       if (filp->f_op && filp->f_op->lock)
+       if (filp->f_op->lock)
                 return filp->f_op->lock(filp, F_GETLK, fl);
         posix_test_lock(filp, fl);
         return 0;
@@ -1909,7 +1944,7 @@ out:
   */
  int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
  {
-       if (filp->f_op && filp->f_op->lock)
+       if (filp->f_op->lock)
                 return filp->f_op->lock(filp, cmd, fl);
         else
                 return posix_lock_file(filp, fl, conf);
@@ -2182,7 +2217,7 @@ void locks_remove_flock(struct file *filp)
         if (!inode->i_flock)
                 return;
  
-       if (filp->f_op && filp->f_op->flock) {
+       if (filp->f_op->flock) {
                 struct file_lock fl = {
                         .fl_pid = current->tgid,
                         .fl_file = filp,
@@ -2246,7 +2281,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
   */
  int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
  {
-       if (filp->f_op && filp->f_op->lock)
+       if (filp->f_op->lock)
                 return filp->f_op->lock(filp, F_CANCELLK, fl);
         return 0;
  }
diff --git a/fs/mount.h b/fs/mount.h

index 64a8581..d64c594 100644 (file)
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -29,6 +29,7 @@ struct mount {
         struct mount *mnt_parent;
         struct dentry *mnt_mountpoint;
         struct vfsmount mnt;
+       struct rcu_head mnt_rcu;
  #ifdef CONFIG_SMP
         struct mnt_pcp __percpu *mnt_pcp;
  #else
@@ -55,7 +56,7 @@ struct mount {
         int mnt_group_id;               /* peer group identifier */
         int mnt_expiry_mark;            /* true if marked for expiry */
         int mnt_pinned;
-       int mnt_ghosts;
+       struct path mnt_ex_mountpoint;
  };
  
  #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
@@ -76,13 +77,28 @@ static inline int is_mounted(struct vfsmount *mnt)
         return !IS_ERR_OR_NULL(real_mount(mnt));
  }
  
-extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
+extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
+extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
+
+extern bool legitimize_mnt(struct vfsmount *, unsigned);
  
  static inline void get_mnt_ns(struct mnt_namespace *ns)
  {
         atomic_inc(&ns->count);
  }
  
+extern seqlock_t mount_lock;
+
+static inline void lock_mount_hash(void)
+{
+       write_seqlock(&mount_lock);
+}
+
+static inline void unlock_mount_hash(void)
+{
+       write_sequnlock(&mount_lock);
+}
+
  struct proc_mounts {
         struct seq_file m;
         struct mnt_namespace *ns;
diff --git a/fs/namei.c b/fs/namei.c

index caa2805..e029a4c 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -482,18 +482,6 @@ EXPORT_SYMBOL(path_put);
   * to restart the path walk from the beginning in ref-walk mode.
   */
  
-static inline void lock_rcu_walk(void)
-{
-       br_read_lock(&vfsmount_lock);
-       rcu_read_lock();
-}
-
-static inline void unlock_rcu_walk(void)
-{
-       rcu_read_unlock();
-       br_read_unlock(&vfsmount_lock);
-}
-
  /**
   * unlazy_walk - try to switch to ref-walk mode.
   * @nd: nameidata pathwalk data
@@ -512,26 +500,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
         BUG_ON(!(nd->flags & LOOKUP_RCU));
  
         /*
-        * Get a reference to the parent first: we're
-        * going to make "path_put(nd->path)" valid in
-        * non-RCU context for "terminate_walk()".
-        *
-        * If this doesn't work, return immediately with
-        * RCU walking still active (and then we will do
-        * the RCU walk cleanup in terminate_walk()).
+        * After legitimizing the bastards, terminate_walk()
+        * will do the right thing for non-RCU mode, and all our
+        * subsequent exit cases should rcu_read_unlock()
+        * before returning.  Do vfsmount first; if dentry
+        * can't be legitimized, just set nd->path.dentry to NULL
+        * and rely on dput(NULL) being a no-op.
          */
-       if (!lockref_get_not_dead(&parent->d_lockref))
+       if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
                 return -ECHILD;
-
-       /*
-        * After the mntget(), we terminate_walk() will do
-        * the right thing for non-RCU mode, and all our
-        * subsequent exit cases should unlock_rcu_walk()
-        * before returning.
-        */
-       mntget(nd->path.mnt);
         nd->flags &= ~LOOKUP_RCU;
  
+       if (!lockref_get_not_dead(&parent->d_lockref)) {
+               nd->path.dentry = NULL; 
+               rcu_read_unlock();
+               return -ECHILD;
+       }
+
         /*
          * For a negative lookup, the lookup sequence point is the parents
          * sequence point, and it only needs to revalidate the parent dentry.
@@ -566,17 +551,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
                 spin_unlock(&fs->lock);
         }
  
-       unlock_rcu_walk();
+       rcu_read_unlock();
         return 0;
  
  unlock_and_drop_dentry:
         spin_unlock(&fs->lock);
  drop_dentry:
-       unlock_rcu_walk();
+       rcu_read_unlock();
         dput(dentry);
         goto drop_root_mnt;
  out:
-       unlock_rcu_walk();
+       rcu_read_unlock();
  drop_root_mnt:
         if (!(nd->flags & LOOKUP_ROOT))
                 nd->root.mnt = NULL;
@@ -608,17 +593,22 @@ static int complete_walk(struct nameidata *nd)
                 if (!(nd->flags & LOOKUP_ROOT))
                         nd->root.mnt = NULL;
  
+               if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
+                       rcu_read_unlock();
+                       return -ECHILD;
+               }
                 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
-                       unlock_rcu_walk();
+                       rcu_read_unlock();
+                       mntput(nd->path.mnt);
                         return -ECHILD;
                 }
                 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
-                       unlock_rcu_walk();
+                       rcu_read_unlock();
                         dput(dentry);
+                       mntput(nd->path.mnt);
                         return -ECHILD;
                 }
-               mntget(nd->path.mnt);
-               unlock_rcu_walk();
+               rcu_read_unlock();
         }
  
         if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -909,15 +899,15 @@ int follow_up(struct path *path)
         struct mount *parent;
         struct dentry *mountpoint;
  
-       br_read_lock(&vfsmount_lock);
+       read_seqlock_excl(&mount_lock);
         parent = mnt->mnt_parent;
         if (parent == mnt) {
-               br_read_unlock(&vfsmount_lock);
+               read_sequnlock_excl(&mount_lock);
                 return 0;
         }
         mntget(&parent->mnt);
         mountpoint = dget(mnt->mnt_mountpoint);
-       br_read_unlock(&vfsmount_lock);
+       read_sequnlock_excl(&mount_lock);
         dput(path->dentry);
         path->dentry = mountpoint;
         mntput(path->mnt);
@@ -1048,8 +1038,8 @@ static int follow_managed(struct path *path, unsigned flags)
  
                         /* Something is mounted on this dentry in another
                          * namespace and/or whatever was mounted there in this
-                        * namespace got unmounted before we managed to get the
-                        * vfsmount_lock */
+                        * namespace got unmounted before lookup_mnt() could
+                        * get it */
                 }
  
                 /* Handle an automount point */
@@ -1111,7 +1101,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
                 if (!d_mountpoint(path->dentry))
                         break;
  
-               mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+               mounted = __lookup_mnt(path->mnt, path->dentry);
                 if (!mounted)
                         break;
                 path->mnt = &mounted->mnt;
@@ -1132,7 +1122,7 @@ static void follow_mount_rcu(struct nameidata *nd)
  {
         while (d_mountpoint(nd->path.dentry)) {
                 struct mount *mounted;
-               mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
+               mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
                 if (!mounted)
                         break;
                 nd->path.mnt = &mounted->mnt;
@@ -1174,7 +1164,7 @@ failed:
         nd->flags &= ~LOOKUP_RCU;
         if (!(nd->flags & LOOKUP_ROOT))
                 nd->root.mnt = NULL;
-       unlock_rcu_walk();
+       rcu_read_unlock();
         return -ECHILD;
  }
  
@@ -1308,8 +1298,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
  }
  
  /*
- * Call i_op->lookup on the dentry.  The dentry must be negative but may be
- * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
+ * Call i_op->lookup on the dentry.  The dentry must be negative and
+ * unhashed.
   *
   * dir->d_inode->i_mutex must be held
   */
@@ -1501,7 +1491,7 @@ static void terminate_walk(struct nameidata *nd)
                 nd->flags &= ~LOOKUP_RCU;
                 if (!(nd->flags & LOOKUP_ROOT))
                         nd->root.mnt = NULL;
-               unlock_rcu_walk();
+               rcu_read_unlock();
         }
  }
  
@@ -1511,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd)
   * so we keep a cache of "no, this doesn't need follow_link"
   * for the common case.
   */
-static inline int should_follow_link(struct inode *inode, int follow)
+static inline int should_follow_link(struct dentry *dentry, int follow)
  {
-       if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
-               if (likely(inode->i_op->follow_link))
-                       return follow;
-
-               /* This gets set once for the inode lifetime */
-               spin_lock(&inode->i_lock);
-               inode->i_opflags |= IOP_NOFOLLOW;
-               spin_unlock(&inode->i_lock);
-       }
-       return 0;
+       return unlikely(d_is_symlink(dentry)) ? follow : 0;
  }
  
  static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1552,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
         if (!inode)
                 goto out_path_put;
  
-       if (should_follow_link(inode, follow)) {
+       if (should_follow_link(path->dentry, follow)) {
                 if (nd->flags & LOOKUP_RCU) {
                         if (unlikely(unlazy_walk(nd, path->dentry))) {
                                 err = -ECHILD;
@@ -1610,26 +1591,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
         return res;
  }
  
-/*
- * We really don't want to look at inode->i_op->lookup
- * when we don't have to. So we keep a cache bit in
- * the inode ->i_opflags field that says "yes, we can
- * do lookup on this inode".
- */
-static inline int can_lookup(struct inode *inode)
-{
-       if (likely(inode->i_opflags & IOP_LOOKUP))
-               return 1;
-       if (likely(!inode->i_op->lookup))
-               return 0;
-
-       /* We do this once for the lifetime of the inode */
-       spin_lock(&inode->i_lock);
-       inode->i_opflags |= IOP_LOOKUP;
-       spin_unlock(&inode->i_lock);
-       return 1;
-}
-
  /*
   * We can do the critical dentry name comparison and hashing
   * operations one word at a time, but we are limited to:
@@ -1833,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                         if (err)
                                 return err;
                 }
-               if (!can_lookup(nd->inode)) {
+               if (!d_is_directory(nd->path.dentry)) {
                         err = -ENOTDIR; 
                         break;
                 }
@@ -1851,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
         nd->flags = flags | LOOKUP_JUMPED;
         nd->depth = 0;
         if (flags & LOOKUP_ROOT) {
-               struct inode *inode = nd->root.dentry->d_inode;
+               struct dentry *root = nd->root.dentry;
+               struct inode *inode = root->d_inode;
                 if (*name) {
-                       if (!can_lookup(inode))
+                       if (!d_is_directory(root))
                                 return -ENOTDIR;
                         retval = inode_permission(inode, MAY_EXEC);
                         if (retval)
@@ -1862,8 +1824,9 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                 nd->path = nd->root;
                 nd->inode = inode;
                 if (flags & LOOKUP_RCU) {
-                       lock_rcu_walk();
+                       rcu_read_lock();
                         nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+                       nd->m_seq = read_seqbegin(&mount_lock);
                 } else {
                         path_get(&nd->path);
                 }
@@ -1872,9 +1835,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
  
         nd->root.mnt = NULL;
  
+       nd->m_seq = read_seqbegin(&mount_lock);
         if (*name=='/') {
                 if (flags & LOOKUP_RCU) {
-                       lock_rcu_walk();
+                       rcu_read_lock();
                         set_root_rcu(nd);
                 } else {
                         set_root(nd);
@@ -1886,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                         struct fs_struct *fs = current->fs;
                         unsigned seq;
  
-                       lock_rcu_walk();
+                       rcu_read_lock();
  
                         do {
                                 seq = read_seqcount_begin(&fs->seq);
@@ -1907,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                 dentry = f.file->f_path.dentry;
  
                 if (*name) {
-                       if (!can_lookup(dentry->d_inode)) {
+                       if (!d_is_directory(dentry)) {
                                 fdput(f);
                                 return -ENOTDIR;
                         }
@@ -1918,7 +1882,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                         if (f.need_put)
                                 *fp = f.file;
                         nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
-                       lock_rcu_walk();
+                       rcu_read_lock();
                 } else {
                         path_get(&nd->path);
                         fdput(f);
@@ -1989,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name,
                 err = complete_walk(nd);
  
         if (!err && nd->flags & LOOKUP_DIRECTORY) {
-               if (!can_lookup(nd->inode)) {
+               if (!d_is_directory(nd->path.dentry)) {
                         path_put(&nd->path);
                         err = -ENOTDIR;
                 }
@@ -2281,7 +2245,7 @@ done:
         }
         path->dentry = dentry;
         path->mnt = mntget(nd->path.mnt);
-       if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW))
+       if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
                 return 1;
         follow_mount(path);
         error = 0;
@@ -2426,12 +2390,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
   * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
   *     nfs_async_unlink().
   */
-static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
  {
+       struct inode *inode = victim->d_inode;
         int error;
  
-       if (!victim->d_inode)
+       if (d_is_negative(victim))
                 return -ENOENT;
+       BUG_ON(!inode);
  
         BUG_ON(victim->d_parent->d_inode != dir);
         audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2441,15 +2407,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
                 return error;
         if (IS_APPEND(dir))
                 return -EPERM;
-       if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
-           IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+
+       if (check_sticky(dir, inode) || IS_APPEND(inode) ||
+           IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
                 return -EPERM;
         if (isdir) {
-               if (!S_ISDIR(victim->d_inode->i_mode))
+               if (!d_is_directory(victim) && !d_is_autodir(victim))
                         return -ENOTDIR;
                 if (IS_ROOT(victim))
                         return -EBUSY;
-       } else if (S_ISDIR(victim->d_inode->i_mode))
+       } else if (d_is_directory(victim) || d_is_autodir(victim))
                 return -EISDIR;
         if (IS_DEADDIR(dir))
                 return -ENOENT;
@@ -2983,7 +2950,7 @@ retry_lookup:
         /*
          * create/update audit record if it already exists.
          */
-       if (path->dentry->d_inode)
+       if (d_is_positive(path->dentry))
                 audit_inode(name, path->dentry, 0);
  
         /*
@@ -3012,12 +2979,12 @@ retry_lookup:
  finish_lookup:
         /* we _can_ be in RCU mode here */
         error = -ENOENT;
-       if (!inode) {
+       if (d_is_negative(path->dentry)) {
                 path_to_nameidata(path, nd);
                 goto out;
         }
  
-       if (should_follow_link(inode, !symlink_ok)) {
+       if (should_follow_link(path->dentry, !symlink_ok)) {
                 if (nd->flags & LOOKUP_RCU) {
                         if (unlikely(unlazy_walk(nd, path->dentry))) {
                                 error = -ECHILD;
@@ -3046,10 +3013,11 @@ finish_open:
         }
         audit_inode(name, nd->path.dentry, 0);
         error = -EISDIR;
-       if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
+       if ((open_flag & O_CREAT) &&
+           (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
                 goto out;
         error = -ENOTDIR;
-       if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
+       if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
                 goto out;
         if (!S_ISREG(nd->inode->i_mode))
                 will_truncate = false;
@@ -3275,7 +3243,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
         nd.root.mnt = mnt;
         nd.root.dentry = dentry;
  
-       if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+       if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
                 return ERR_PTR(-ELOOP);
  
         file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3325,8 +3293,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
                 goto unlock;
  
         error = -EEXIST;
-       if (dentry->d_inode)
+       if (d_is_positive(dentry))
                 goto fail;
+
         /*
          * Special case - lookup gave negative, but... we had foo/bar/
          * From the vfs_mknod() POV we just have a negative dentry -
@@ -3647,8 +3616,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
         return do_rmdir(AT_FDCWD, pathname);
  }
  
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_unlink - unlink a filesystem object
+ * @dir:       parent directory
+ * @dentry:    victim
+ * @delegated_inode: returns victim inode, if the inode is delegated.
+ *
+ * The caller must hold dir->i_mutex.
+ *
+ * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
+ * return a reference to the inode in delegated_inode.  The caller
+ * should then break the delegation on that inode and retry.  Because
+ * breaking a delegation may take a long time, the caller should drop
+ * dir->i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
  {
+       struct inode *target = dentry->d_inode;
         int error = may_delete(dir, dentry, 0);
  
         if (error)
@@ -3657,22 +3645,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
         if (!dir->i_op->unlink)
                 return -EPERM;
  
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&target->i_mutex);
         if (d_mountpoint(dentry))
                 error = -EBUSY;
         else {
                 error = security_inode_unlink(dir, dentry);
                 if (!error) {
+                       error = try_break_deleg(target, delegated_inode);
+                       if (error)
+                               goto out;
                         error = dir->i_op->unlink(dir, dentry);
                         if (!error)
                                 dont_mount(dentry);
                 }
         }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+out:
+       mutex_unlock(&target->i_mutex);
  
         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
         if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
-               fsnotify_link_count(dentry->d_inode);
+               fsnotify_link_count(target);
                 d_delete(dentry);
         }
  
@@ -3692,6 +3684,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
         struct dentry *dentry;
         struct nameidata nd;
         struct inode *inode = NULL;
+       struct inode *delegated_inode = NULL;
         unsigned int lookup_flags = 0;
  retry:
         name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3706,7 +3699,7 @@ retry:
         error = mnt_want_write(nd.path.mnt);
         if (error)
                 goto exit1;
-
+retry_deleg:
         mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
         dentry = lookup_hash(&nd);
         error = PTR_ERR(dentry);
@@ -3715,19 +3708,25 @@ retry:
                 if (nd.last.name[nd.last.len])
                         goto slashes;
                 inode = dentry->d_inode;
-               if (!inode)
+               if (d_is_negative(dentry))
                         goto slashes;
                 ihold(inode);
                 error = security_path_unlink(&nd.path, dentry);
                 if (error)
                         goto exit2;
-               error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+               error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
  exit2:
                 dput(dentry);
         }
         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
         if (inode)
                 iput(inode);    /* truncate the inode here */
+       inode = NULL;
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
         mnt_drop_write(nd.path.mnt);
  exit1:
         path_put(&nd.path);
@@ -3740,8 +3739,12 @@ exit1:
         return error;
  
  slashes:
-       error = !dentry->d_inode ? -ENOENT :
-               S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+       if (d_is_negative(dentry))
+               error = -ENOENT;
+       else if (d_is_directory(dentry) || d_is_autodir(dentry))
+               error = -EISDIR;
+       else
+               error = -ENOTDIR;
         goto exit2;
  }
  
@@ -3817,7 +3820,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
         return sys_symlinkat(oldname, AT_FDCWD, newname);
  }
  
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+/**
+ * vfs_link - create a new link
+ * @old_dentry:        object to be linked
+ * @dir:       new parent
+ * @new_dentry:        where to create the new link
+ * @delegated_inode: returns inode needing a delegation break
+ *
+ * The caller must hold dir->i_mutex
+ *
+ * If vfs_link discovers a delegation on the to-be-linked file in need
+ * of breaking, it will return -EWOULDBLOCK and return a reference to the
+ * inode in delegated_inode.  The caller should then break the delegation
+ * and retry.  Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
  {
         struct inode *inode = old_dentry->d_inode;
         unsigned max_links = dir->i_sb->s_max_links;
@@ -3853,8 +3875,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                 error =  -ENOENT;
         else if (max_links && inode->i_nlink >= max_links)
                 error = -EMLINK;
-       else
-               error = dir->i_op->link(old_dentry, dir, new_dentry);
+       else {
+               error = try_break_deleg(inode, delegated_inode);
+               if (!error)
+                       error = dir->i_op->link(old_dentry, dir, new_dentry);
+       }
  
         if (!error && (inode->i_state & I_LINKABLE)) {
                 spin_lock(&inode->i_lock);
@@ -3881,6 +3906,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
  {
         struct dentry *new_dentry;
         struct path old_path, new_path;
+       struct inode *delegated_inode = NULL;
         int how = 0;
         int error;
  
@@ -3919,9 +3945,14 @@ retry:
         error = security_path_link(old_path.dentry, &new_path, new_dentry);
         if (error)
                 goto out_dput;
-       error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
+       error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
  out_dput:
         done_path_create(&new_path, new_dentry);
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry;
+       }
         if (retry_estale(error, how)) {
                 how |= LOOKUP_REVAL;
                 goto retry;
@@ -3946,7 +3977,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
   *        That's where 4.4 screws up. Current fix: serialization on
   *        sb->s_vfs_rename_mutex. We might be more accurate, but that's another
   *        story.
- *     c) we have to lock _three_ objects - parents and victim (if it exists).
+ *     c) we have to lock _four_ objects - parents and victim (if it exists),
+ *        and source (if it is not a directory).
   *        And that - after we got ->i_mutex on parents (until then we don't know
   *        whether the target exists).  Solution: try to be smart with locking
   *        order for inodes.  We rely on the fact that tree topology may change
@@ -4019,9 +4051,11 @@ out:
  }
  
  static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-                           struct inode *new_dir, struct dentry *new_dentry)
+                           struct inode *new_dir, struct dentry *new_dentry,
+                           struct inode **delegated_inode)
  {
         struct inode *target = new_dentry->d_inode;
+       struct inode *source = old_dentry->d_inode;
         int error;
  
         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -4029,13 +4063,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
                 return error;
  
         dget(new_dentry);
-       if (target)
-               mutex_lock(&target->i_mutex);
+       lock_two_nondirectories(source, target);
  
         error = -EBUSY;
         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
                 goto out;
  
+       error = try_break_deleg(source, delegated_inode);
+       if (error)
+               goto out;
+       if (target) {
+               error = try_break_deleg(target, delegated_inode);
+               if (error)
+                       goto out;
+       }
         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
         if (error)
                 goto out;
@@ -4045,17 +4086,38 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
         if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
                 d_move(old_dentry, new_dentry);
  out:
-       if (target)
-               mutex_unlock(&target->i_mutex);
+       unlock_two_nondirectories(source, target);
         dput(new_dentry);
         return error;
  }
  
+/**
+ * vfs_rename - rename a filesystem object
+ * @old_dir:   parent of source
+ * @old_dentry:        source
+ * @new_dir:   parent of destination
+ * @new_dentry:        destination
+ * @delegated_inode: returns an inode needing a delegation break
+ *
+ * The caller must hold multiple mutexes--see lock_rename()).
+ *
+ * If vfs_rename discovers a delegation in need of breaking at either
+ * the source or destination, it will return -EWOULDBLOCK and return a
+ * reference to the inode in delegated_inode.  The caller should then
+ * break the delegation and retry.  Because breaking a delegation may
+ * take a long time, the caller should drop all locks before doing
+ * so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
  int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-              struct inode *new_dir, struct dentry *new_dentry)
+              struct inode *new_dir, struct dentry *new_dentry,
+              struct inode **delegated_inode)
  {
         int error;
-       int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+       int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
         const unsigned char *old_name;
  
         if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4080,7 +4142,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (is_dir)
                 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
         else
-               error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+               error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
         if (!error)
                 fsnotify_move(old_dir, new_dir, old_name, is_dir,
                               new_dentry->d_inode, old_dentry);
@@ -4096,6 +4158,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
         struct dentry *old_dentry, *new_dentry;
         struct dentry *trap;
         struct nameidata oldnd, newnd;
+       struct inode *delegated_inode = NULL;
         struct filename *from;
         struct filename *to;
         unsigned int lookup_flags = 0;
@@ -4135,6 +4198,7 @@ retry:
         newnd.flags &= ~LOOKUP_PARENT;
         newnd.flags |= LOOKUP_RENAME_TARGET;
  
+retry_deleg:
         trap = lock_rename(new_dir, old_dir);
  
         old_dentry = lookup_hash(&oldnd);
@@ -4143,10 +4207,10 @@ retry:
                 goto exit3;
         /* source must exist */
         error = -ENOENT;
-       if (!old_dentry->d_inode)
+       if (d_is_negative(old_dentry))
                 goto exit4;
         /* unless the source is a directory trailing slashes give -ENOTDIR */
-       if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+       if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
                 error = -ENOTDIR;
                 if (oldnd.last.name[oldnd.last.len])
                         goto exit4;
@@ -4171,13 +4235,19 @@ retry:
         if (error)
                 goto exit5;
         error = vfs_rename(old_dir->d_inode, old_dentry,
-                                  new_dir->d_inode, new_dentry);
+                                  new_dir->d_inode, new_dentry,
+                                  &delegated_inode);
  exit5:
         dput(new_dentry);
  exit4:
         dput(old_dentry);
  exit3:
         unlock_rename(new_dir, old_dir);
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
         mnt_drop_write(oldnd.path.mnt);
  exit2:
         if (retry_estale(error, lookup_flags))
diff --git a/fs/namespace.c b/fs/namespace.c

index da5c494..ac2ce8a 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -39,7 +39,7 @@ static int mnt_group_start = 1;
  static struct list_head *mount_hashtable __read_mostly;
  static struct list_head *mountpoint_hashtable __read_mostly;
  static struct kmem_cache *mnt_cache __read_mostly;
-static struct rw_semaphore namespace_sem;
+static DECLARE_RWSEM(namespace_sem);
  
  /* /sys/fs */
  struct kobject *fs_kobj;
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj);
   * It should be taken for write in all cases where the vfsmount
   * tree or hash is modified or when a vfsmount structure is modified.
   */
-DEFINE_BRLOCK(vfsmount_lock);
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
  
  static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
  {
@@ -63,8 +63,6 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
         return tmp & (HASH_SIZE - 1);
  }
  
-#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
-
  /*
   * allocation is serialized by namespace_sem, but we need the spinlock to
   * serialize with freeing.
@@ -458,7 +456,7 @@ static int mnt_make_readonly(struct mount *mnt)
  {
         int ret = 0;
  
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
         /*
          * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -492,15 +490,15 @@ static int mnt_make_readonly(struct mount *mnt)
          */
         smp_wmb();
         mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
         return ret;
  }
  
  static void __mnt_unmake_readonly(struct mount *mnt)
  {
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         mnt->mnt.mnt_flags &= ~MNT_READONLY;
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
  }
  
  int sb_prepare_remount_readonly(struct super_block *sb)
@@ -512,7 +510,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
         if (atomic_long_read(&sb->s_remove_count))
                 return -EBUSY;
  
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
                 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
                         mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
@@ -534,7 +532,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
                 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
                         mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
         }
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
  
         return err;
  }
@@ -549,30 +547,56 @@ static void free_vfsmnt(struct mount *mnt)
         kmem_cache_free(mnt_cache, mnt);
  }
  
+/* call under rcu_read_lock */
+bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+{
+       struct mount *mnt;
+       if (read_seqretry(&mount_lock, seq))
+               return false;
+       if (bastard == NULL)
+               return true;
+       mnt = real_mount(bastard);
+       mnt_add_count(mnt, 1);
+       if (likely(!read_seqretry(&mount_lock, seq)))
+               return true;
+       if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
+               mnt_add_count(mnt, -1);
+               return false;
+       }
+       rcu_read_unlock();
+       mntput(bastard);
+       rcu_read_lock();
+       return false;
+}
+
  /*
- * find the first or last mount at @dentry on vfsmount @mnt depending on
- * @dir. If @dir is set return the first mount else return the last mount.
- * vfsmount_lock must be held for read or write.
+ * find the first mount at @dentry on vfsmount @mnt.
+ * call under rcu_read_lock()
   */
-struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
-                             int dir)
+struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
  {
         struct list_head *head = mount_hashtable + hash(mnt, dentry);
-       struct list_head *tmp = head;
-       struct mount *p, *found = NULL;
+       struct mount *p;
  
-       for (;;) {
-               tmp = dir ? tmp->next : tmp->prev;
-               p = NULL;
-               if (tmp == head)
-                       break;
-               p = list_entry(tmp, struct mount, mnt_hash);
-               if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
-                       found = p;
-                       break;
-               }
-       }
-       return found;
+       list_for_each_entry_rcu(p, head, mnt_hash)
+               if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+                       return p;
+       return NULL;
+}
+
+/*
+ * find the last mount at @dentry on vfsmount @mnt.
+ * mount_lock must be held.
+ */
+struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
+{
+       struct list_head *head = mount_hashtable + hash(mnt, dentry);
+       struct mount *p;
+
+       list_for_each_entry_reverse(p, head, mnt_hash)
+               if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+                       return p;
+       return NULL;
  }
  
  /*
@@ -594,17 +618,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
  struct vfsmount *lookup_mnt(struct path *path)
  {
         struct mount *child_mnt;
+       struct vfsmount *m;
+       unsigned seq;
  
-       br_read_lock(&vfsmount_lock);
-       child_mnt = __lookup_mnt(path->mnt, path->dentry, 1);
-       if (child_mnt) {
-               mnt_add_count(child_mnt, 1);
-               br_read_unlock(&vfsmount_lock);
-               return &child_mnt->mnt;
-       } else {
-               br_read_unlock(&vfsmount_lock);
-               return NULL;
-       }
+       rcu_read_lock();
+       do {
+               seq = read_seqbegin(&mount_lock);
+               child_mnt = __lookup_mnt(path->mnt, path->dentry);
+               m = child_mnt ? &child_mnt->mnt : NULL;
+       } while (!legitimize_mnt(m, seq));
+       rcu_read_unlock();
+       return m;
  }
  
  static struct mountpoint *new_mountpoint(struct dentry *dentry)
@@ -796,9 +820,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
         mnt->mnt.mnt_sb = root->d_sb;
         mnt->mnt_mountpoint = mnt->mnt.mnt_root;
         mnt->mnt_parent = mnt;
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
         return &mnt->mnt;
  }
  EXPORT_SYMBOL_GPL(vfs_kern_mount);
@@ -839,9 +863,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
         mnt->mnt.mnt_root = dget(root);
         mnt->mnt_mountpoint = mnt->mnt.mnt_root;
         mnt->mnt_parent = mnt;
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
  
         if ((flag & CL_SLAVE) ||
             ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
@@ -872,64 +896,66 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
         return ERR_PTR(err);
  }
  
-static inline void mntfree(struct mount *mnt)
+static void delayed_free(struct rcu_head *head)
  {
-       struct vfsmount *m = &mnt->mnt;
-       struct super_block *sb = m->mnt_sb;
-
-       /*
-        * This probably indicates that somebody messed
-        * up a mnt_want/drop_write() pair.  If this
-        * happens, the filesystem was probably unable
-        * to make r/w->r/o transitions.
-        */
-       /*
-        * The locking used to deal with mnt_count decrement provides barriers,
-        * so mnt_get_writers() below is safe.
-        */
-       WARN_ON(mnt_get_writers(mnt));
-       fsnotify_vfsmount_delete(m);
-       dput(m->mnt_root);
-       free_vfsmnt(mnt);
-       deactivate_super(sb);
+       struct mount *mnt = container_of(head, struct mount, mnt_rcu);
+       kfree(mnt->mnt_devname);
+#ifdef CONFIG_SMP
+       free_percpu(mnt->mnt_pcp);
+#endif
+       kmem_cache_free(mnt_cache, mnt);
  }
  
  static void mntput_no_expire(struct mount *mnt)
  {
  put_again:
-#ifdef CONFIG_SMP
-       br_read_lock(&vfsmount_lock);
-       if (likely(mnt->mnt_ns)) {
-               /* shouldn't be the last one */
-               mnt_add_count(mnt, -1);
-               br_read_unlock(&vfsmount_lock);
+       rcu_read_lock();
+       mnt_add_count(mnt, -1);
+       if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+               rcu_read_unlock();
                 return;
         }
-       br_read_unlock(&vfsmount_lock);
-
-       br_write_lock(&vfsmount_lock);
-       mnt_add_count(mnt, -1);
+       lock_mount_hash();
         if (mnt_get_count(mnt)) {
-               br_write_unlock(&vfsmount_lock);
+               rcu_read_unlock();
+               unlock_mount_hash();
                 return;
         }
-#else
-       mnt_add_count(mnt, -1);
-       if (likely(mnt_get_count(mnt)))
-               return;
-       br_write_lock(&vfsmount_lock);
-#endif
         if (unlikely(mnt->mnt_pinned)) {
                 mnt_add_count(mnt, mnt->mnt_pinned + 1);
                 mnt->mnt_pinned = 0;
-               br_write_unlock(&vfsmount_lock);
+               rcu_read_unlock();
+               unlock_mount_hash();
                 acct_auto_close_mnt(&mnt->mnt);
                 goto put_again;
         }
+       if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
+               rcu_read_unlock();
+               unlock_mount_hash();
+               return;
+       }
+       mnt->mnt.mnt_flags |= MNT_DOOMED;
+       rcu_read_unlock();
  
         list_del(&mnt->mnt_instance);
-       br_write_unlock(&vfsmount_lock);
-       mntfree(mnt);
+       unlock_mount_hash();
+
+       /*
+        * This probably indicates that somebody messed
+        * up a mnt_want/drop_write() pair.  If this
+        * happens, the filesystem was probably unable
+        * to make r/w->r/o transitions.
+        */
+       /*
+        * The locking used to deal with mnt_count decrement provides barriers,
+        * so mnt_get_writers() below is safe.
+        */
+       WARN_ON(mnt_get_writers(mnt));
+       fsnotify_vfsmount_delete(&mnt->mnt);
+       dput(mnt->mnt.mnt_root);
+       deactivate_super(mnt->mnt.mnt_sb);
+       mnt_free_id(mnt);
+       call_rcu(&mnt->mnt_rcu, delayed_free);
  }
  
  void mntput(struct vfsmount *mnt)
@@ -954,21 +980,21 @@ EXPORT_SYMBOL(mntget);
  
  void mnt_pin(struct vfsmount *mnt)
  {
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         real_mount(mnt)->mnt_pinned++;
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
  }
  EXPORT_SYMBOL(mnt_pin);
  
  void mnt_unpin(struct vfsmount *m)
  {
         struct mount *mnt = real_mount(m);
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         if (mnt->mnt_pinned) {
                 mnt_add_count(mnt, 1);
                 mnt->mnt_pinned--;
         }
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
  }
  EXPORT_SYMBOL(mnt_unpin);
  
@@ -1085,12 +1111,12 @@ int may_umount_tree(struct vfsmount *m)
         BUG_ON(!m);
  
         /* write lock needed for mnt_get_count */
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         for (p = mnt; p; p = next_mnt(p, mnt)) {
                 actual_refs += mnt_get_count(p);
                 minimum_refs += 2;
         }
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
  
         if (actual_refs > minimum_refs)
                 return 0;
@@ -1117,10 +1143,10 @@ int may_umount(struct vfsmount *mnt)
  {
         int ret = 1;
         down_read(&namespace_sem);
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         if (propagate_mount_busy(real_mount(mnt), 2))
                 ret = 0;
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
         up_read(&namespace_sem);
         return ret;
  }
@@ -1142,23 +1168,13 @@ static void namespace_unlock(void)
         list_splice_init(&unmounted, &head);
         up_write(&namespace_sem);
  
+       synchronize_rcu();
+
         while (!list_empty(&head)) {
                 mnt = list_first_entry(&head, struct mount, mnt_hash);
                 list_del_init(&mnt->mnt_hash);
-               if (mnt_has_parent(mnt)) {
-                       struct dentry *dentry;
-                       struct mount *m;
-
-                       br_write_lock(&vfsmount_lock);
-                       dentry = mnt->mnt_mountpoint;
-                       m = mnt->mnt_parent;
-                       mnt->mnt_mountpoint = mnt->mnt.mnt_root;
-                       mnt->mnt_parent = mnt;
-                       m->mnt_ghosts--;
-                       br_write_unlock(&vfsmount_lock);
-                       dput(dentry);
-                       mntput(&m->mnt);
-               }
+               if (mnt->mnt_ex_mountpoint.mnt)
+                       path_put(&mnt->mnt_ex_mountpoint);
                 mntput(&mnt->mnt);
         }
  }
@@ -1169,10 +1185,13 @@ static inline void namespace_lock(void)
  }
  
  /*
- * vfsmount lock must be held for write
+ * mount_lock must be held
   * namespace_sem must be held for write
+ * how = 0 => just this tree, don't propagate
+ * how = 1 => propagate; we know that nobody else has reference to any victims
+ * how = 2 => lazy umount
   */
-void umount_tree(struct mount *mnt, int propagate)
+void umount_tree(struct mount *mnt, int how)
  {
         LIST_HEAD(tmp_list);
         struct mount *p;
@@ -1180,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate)
         for (p = mnt; p; p = next_mnt(p, mnt))
                 list_move(&p->mnt_hash, &tmp_list);
  
-       if (propagate)
+       if (how)
                 propagate_umount(&tmp_list);
  
         list_for_each_entry(p, &tmp_list, mnt_hash) {
@@ -1188,10 +1207,16 @@ void umount_tree(struct mount *mnt, int propagate)
                 list_del_init(&p->mnt_list);
                 __touch_mnt_namespace(p->mnt_ns);
                 p->mnt_ns = NULL;
+               if (how < 2)
+                       p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
                 list_del_init(&p->mnt_child);
                 if (mnt_has_parent(p)) {
-                       p->mnt_parent->mnt_ghosts++;
                         put_mountpoint(p->mnt_mp);
+                       /* move the reference to mountpoint into ->mnt_ex_mountpoint */
+                       p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
+                       p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
+                       p->mnt_mountpoint = p->mnt.mnt_root;
+                       p->mnt_parent = p;
                         p->mnt_mp = NULL;
                 }
                 change_mnt_propagation(p, MS_PRIVATE);
@@ -1225,12 +1250,12 @@ static int do_umount(struct mount *mnt, int flags)
                  * probably don't strictly need the lock here if we examined
                  * all race cases, but it's a slowpath.
                  */
-               br_write_lock(&vfsmount_lock);
+               lock_mount_hash();
                 if (mnt_get_count(mnt) != 2) {
-                       br_write_unlock(&vfsmount_lock);
+                       unlock_mount_hash();
                         return -EBUSY;
                 }
-               br_write_unlock(&vfsmount_lock);
+               unlock_mount_hash();
  
                 if (!xchg(&mnt->mnt_expiry_mark, 1))
                         return -EAGAIN;
@@ -1272,19 +1297,23 @@ static int do_umount(struct mount *mnt, int flags)
         }
  
         namespace_lock();
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         event++;
  
-       if (!(flags & MNT_DETACH))
-               shrink_submounts(mnt);
-
-       retval = -EBUSY;
-       if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
+       if (flags & MNT_DETACH) {
                 if (!list_empty(&mnt->mnt_list))
-                       umount_tree(mnt, 1);
+                       umount_tree(mnt, 2);
                 retval = 0;
+       } else {
+               shrink_submounts(mnt);
+               retval = -EBUSY;
+               if (!propagate_mount_busy(mnt, 2)) {
+                       if (!list_empty(&mnt->mnt_list))
+                               umount_tree(mnt, 1);
+                       retval = 0;
+               }
         }
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
         namespace_unlock();
         return retval;
  }
@@ -1427,18 +1456,18 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                         q = clone_mnt(p, p->mnt.mnt_root, flag);
                         if (IS_ERR(q))
                                 goto out;
-                       br_write_lock(&vfsmount_lock);
+                       lock_mount_hash();
                         list_add_tail(&q->mnt_list, &res->mnt_list);
                         attach_mnt(q, parent, p->mnt_mp);
-                       br_write_unlock(&vfsmount_lock);
+                       unlock_mount_hash();
                 }
         }
         return res;
  out:
         if (res) {
-               br_write_lock(&vfsmount_lock);
+               lock_mount_hash();
                 umount_tree(res, 0);
-               br_write_unlock(&vfsmount_lock);
+               unlock_mount_hash();
         }
         return q;
  }
@@ -1460,9 +1489,9 @@ struct vfsmount *collect_mounts(struct path *path)
  void drop_collected_mounts(struct vfsmount *mnt)
  {
         namespace_lock();
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         umount_tree(real_mount(mnt), 0);
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
         namespace_unlock();
  }
  
@@ -1589,7 +1618,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
         if (err)
                 goto out_cleanup_ids;
  
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
  
         if (IS_MNT_SHARED(dest_mnt)) {
                 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1608,7 +1637,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                 list_del_init(&child->mnt_hash);
                 commit_tree(child);
         }
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
  
         return 0;
  
@@ -1710,10 +1739,10 @@ static int do_change_type(struct path *path, int flag)
                         goto out_unlock;
         }
  
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
                 change_mnt_propagation(m, type);
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
  
   out_unlock:
         namespace_unlock();
@@ -1785,9 +1814,9 @@ static int do_loopback(struct path *path, const char *old_name,
  
         err = graft_tree(mnt, parent, mp);
         if (err) {
-               br_write_lock(&vfsmount_lock);
+               lock_mount_hash();
                 umount_tree(mnt, 0);
-               br_write_unlock(&vfsmount_lock);
+               unlock_mount_hash();
         }
  out2:
         unlock_mount(mp);
@@ -1846,17 +1875,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
         else
                 err = do_remount_sb(sb, flags, data, 0);
         if (!err) {
-               br_write_lock(&vfsmount_lock);
+               lock_mount_hash();
                 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
                 mnt->mnt.mnt_flags = mnt_flags;
-               br_write_unlock(&vfsmount_lock);
-       }
-       up_write(&sb->s_umount);
-       if (!err) {
-               br_write_lock(&vfsmount_lock);
                 touch_mnt_namespace(mnt->mnt_ns);
-               br_write_unlock(&vfsmount_lock);
+               unlock_mount_hash();
         }
+       up_write(&sb->s_umount);
         return err;
  }
  
@@ -1972,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
         struct mount *parent;
         int err;
  
-       mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
+       mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
  
         mp = lock_mount(path);
         if (IS_ERR(mp))
@@ -2077,9 +2102,7 @@ fail:
         /* remove m from any expiration list it may be on */
         if (!list_empty(&mnt->mnt_expire)) {
                 namespace_lock();
-               br_write_lock(&vfsmount_lock);
                 list_del_init(&mnt->mnt_expire);
-               br_write_unlock(&vfsmount_lock);
                 namespace_unlock();
         }
         mntput(m);
@@ -2095,11 +2118,9 @@ fail:
  void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
  {
         namespace_lock();
-       br_write_lock(&vfsmount_lock);
  
         list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
  
-       br_write_unlock(&vfsmount_lock);
         namespace_unlock();
  }
  EXPORT_SYMBOL(mnt_set_expiry);
@@ -2118,7 +2139,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
                 return;
  
         namespace_lock();
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
  
         /* extract from the expiration list every vfsmount that matches the
          * following criteria:
@@ -2137,7 +2158,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
                 touch_mnt_namespace(mnt->mnt_ns);
                 umount_tree(mnt, 1);
         }
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
         namespace_unlock();
  }
  
@@ -2193,7 +2214,7 @@ resume:
   * process a list of expirable mountpoints with the intent of discarding any
   * submounts of a specific parent mountpoint
   *
- * vfsmount_lock must be held for write
+ * mount_lock must be held for write
   */
  static void shrink_submounts(struct mount *mnt)
  {
@@ -2414,20 +2435,25 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
         return new_ns;
  }
  
-/*
- * Allocate a new namespace structure and populate it with contents
- * copied from the namespace of the passed in task structure.
- */
-static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
-               struct user_namespace *user_ns, struct fs_struct *fs)
+struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
+               struct user_namespace *user_ns, struct fs_struct *new_fs)
  {
         struct mnt_namespace *new_ns;
         struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
         struct mount *p, *q;
-       struct mount *old = mnt_ns->root;
+       struct mount *old;
         struct mount *new;
         int copy_flags;
  
+       BUG_ON(!ns);
+
+       if (likely(!(flags & CLONE_NEWNS))) {
+               get_mnt_ns(ns);
+               return ns;
+       }
+
+       old = ns->root;
+
         new_ns = alloc_mnt_ns(user_ns);
         if (IS_ERR(new_ns))
                 return new_ns;
@@ -2435,7 +2461,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
         namespace_lock();
         /* First pass: copy the tree topology */
         copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
-       if (user_ns != mnt_ns->user_ns)
+       if (user_ns != ns->user_ns)
                 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
         new = copy_tree(old, old->mnt.mnt_root, copy_flags);
         if (IS_ERR(new)) {
@@ -2444,9 +2470,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
                 return ERR_CAST(new);
         }
         new_ns->root = new;
-       br_write_lock(&vfsmount_lock);
         list_add_tail(&new_ns->list, &new->mnt_list);
-       br_write_unlock(&vfsmount_lock);
  
         /*
          * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2457,13 +2481,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
         q = new;
         while (p) {
                 q->mnt_ns = new_ns;
-               if (fs) {
-                       if (&p->mnt == fs->root.mnt) {
-                               fs->root.mnt = mntget(&q->mnt);
+               if (new_fs) {
+                       if (&p->mnt == new_fs->root.mnt) {
+                               new_fs->root.mnt = mntget(&q->mnt);
                                 rootmnt = &p->mnt;
                         }
-                       if (&p->mnt == fs->pwd.mnt) {
-                               fs->pwd.mnt = mntget(&q->mnt);
+                       if (&p->mnt == new_fs->pwd.mnt) {
+                               new_fs->pwd.mnt = mntget(&q->mnt);
                                 pwdmnt = &p->mnt;
                         }
                 }
@@ -2484,23 +2508,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
         return new_ns;
  }
  
-struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
-               struct user_namespace *user_ns, struct fs_struct *new_fs)
-{
-       struct mnt_namespace *new_ns;
-
-       BUG_ON(!ns);
-       get_mnt_ns(ns);
-
-       if (!(flags & CLONE_NEWNS))
-               return ns;
-
-       new_ns = dup_mnt_ns(ns, user_ns, new_fs);
-
-       put_mnt_ns(ns);
-       return new_ns;
-}
-
  /**
   * create_mnt_ns - creates a private namespace and adds a root filesystem
   * @mnt: pointer to the new root filesystem mountpoint
@@ -2593,7 +2600,7 @@ out_type:
  /*
   * Return true if path is reachable from root
   *
- * namespace_sem or vfsmount_lock is held
+ * namespace_sem or mount_lock is held
   */
  bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
                          const struct path *root)
@@ -2608,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
  int path_is_under(struct path *path1, struct path *path2)
  {
         int res;
-       br_read_lock(&vfsmount_lock);
+       read_seqlock_excl(&mount_lock);
         res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
-       br_read_unlock(&vfsmount_lock);
+       read_sequnlock_excl(&mount_lock);
         return res;
  }
  EXPORT_SYMBOL(path_is_under);
@@ -2701,7 +2708,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
         if (!is_path_reachable(old_mnt, old.dentry, &new))
                 goto out4;
         root_mp->m_count++; /* pin it so it won't go away */
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         detach_mnt(new_mnt, &parent_path);
         detach_mnt(root_mnt, &root_parent);
         if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
@@ -2713,7 +2720,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
         /* mount new_root on / */
         attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
         touch_mnt_namespace(current->nsproxy->mnt_ns);
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
         chroot_fs_refs(&root, &new);
         put_mountpoint(root_mp);
         error = 0;
@@ -2767,8 +2774,6 @@ void __init mnt_init(void)
         unsigned u;
         int err;
  
-       init_rwsem(&namespace_sem);
-
         mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
                         0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
  
@@ -2785,8 +2790,6 @@ void __init mnt_init(void)
         for (u = 0; u < HASH_SIZE; u++)
                 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
  
-       br_lock_init(&vfsmount_lock);
-
         err = sysfs_init();
         if (err)
                 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2802,11 +2805,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
  {
         if (!atomic_dec_and_test(&ns->count))
                 return;
-       namespace_lock();
-       br_write_lock(&vfsmount_lock);
-       umount_tree(ns->root, 0);
-       br_write_unlock(&vfsmount_lock);
-       namespace_unlock();
+       drop_collected_mounts(&ns->root->mnt);
         free_mnt_ns(ns);
  }
  
@@ -2829,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt)
  {
         /* release long term mount so mount point can be released */
         if (!IS_ERR_OR_NULL(mnt)) {
-               br_write_lock(&vfsmount_lock);
                 real_mount(mnt)->mnt_ns = NULL;
-               br_write_unlock(&vfsmount_lock);
+               synchronize_rcu();      /* yecchhh... */
                 mntput(mnt);
         }
  }
@@ -2875,7 +2873,7 @@ bool fs_fully_visible(struct file_system_type *type)
         if (unlikely(!ns))
                 return false;
  
-       namespace_lock();
+       down_read(&namespace_sem);
         list_for_each_entry(mnt, &ns->list, mnt_list) {
                 struct mount *child;
                 if (mnt->mnt.mnt_sb->s_type != type)
@@ -2896,7 +2894,7 @@ bool fs_fully_visible(struct file_system_type *type)
         next:   ;
         }
  found:
-       namespace_unlock();
+       up_read(&namespace_sem);
         return visible;
  }
  
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c

index 3be0474..c320ac5 100644 (file)
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -339,9 +339,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
         if (val)
                 goto finished;
  
-       DDPRINTK("ncp_lookup_validate: %s/%s not valid, age=%ld, server lookup\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
-               NCP_GET_AGE(dentry));
+       DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n",
+               dentry, NCP_GET_AGE(dentry));
  
         len = sizeof(__name);
         if (ncp_is_server_root(dir)) {
@@ -359,8 +358,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                         res = ncp_obtain_info(server, dir, __name, &(finfo.i));
         }
         finfo.volume = finfo.i.volNumber;
-       DDPRINTK("ncp_lookup_validate: looked for %s/%s, res=%d\n",
-               dentry->d_parent->d_name.name, __name, res);
+       DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n",
+               dentry->d_parent, __name, res);
         /*
          * If we didn't find it, or if it has a different dirEntNum to
          * what we remember, it's not valid any more.
@@ -454,8 +453,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
         ctl.page  = NULL;
         ctl.cache = NULL;
  
-       DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
+       DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file,
                 (int) ctx->pos);
  
         result = -EIO;
@@ -740,12 +738,10 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx,
         int more;
         size_t bufsize;
  
-       DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
+       DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file,
                 (unsigned long) ctx->pos);
-       PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n",
-               dentry->d_name.name, NCP_FINFO(dir)->volNumber,
-               NCP_FINFO(dir)->dirEntNum);
+       PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n",
+               file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
  
         err = ncp_initialize_search(server, dir, &seq);
         if (err) {
@@ -850,8 +846,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
         if (!ncp_conn_valid(server))
                 goto finished;
  
-       PPRINTK("ncp_lookup: server lookup for %s/%s\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry);
  
         len = sizeof(__name);
         if (ncp_is_server_root(dir)) {
@@ -867,8 +862,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
                 if (!res)
                         res = ncp_obtain_info(server, dir, __name, &(finfo.i));
         }
-       PPRINTK("ncp_lookup: looked for %s/%s, res=%d\n",
-               dentry->d_parent->d_name.name, __name, res);
+       PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res);
         /*
          * If we didn't find an entry, make a negative dentry.
          */
@@ -915,8 +909,7 @@ out:
         return error;
  
  out_close:
-       PPRINTK("ncp_instantiate: %s/%s failed, closing file\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry);
         ncp_close_file(NCP_SERVER(dir), finfo->file_handle);
         goto out;
  }
@@ -930,8 +923,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
         int opmode;
         __u8 __name[NCP_MAXPATHLEN + 1];
         
-       PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name, mode);
+       PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode);
  
         ncp_age_dentry(server, dentry);
         len = sizeof(__name);
@@ -960,8 +952,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
                                 error = -ENAMETOOLONG;
                         else if (result < 0)
                                 error = result;
-                       DPRINTK("ncp_create: %s/%s failed\n",
-                               dentry->d_parent->d_name.name, dentry->d_name.name);
+                       DPRINTK("ncp_create: %pd2 failed\n", dentry);
                         goto out;
                 }
                 opmode = O_WRONLY;
@@ -994,8 +985,7 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         int error, len;
         __u8 __name[NCP_MAXPATHLEN + 1];
  
-       DPRINTK("ncp_mkdir: making %s/%s\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       DPRINTK("ncp_mkdir: making %pd2\n", dentry);
  
         ncp_age_dentry(server, dentry);
         len = sizeof(__name);
@@ -1032,8 +1022,7 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
         int error, result, len;
         __u8 __name[NCP_MAXPATHLEN + 1];
  
-       DPRINTK("ncp_rmdir: removing %s/%s\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       DPRINTK("ncp_rmdir: removing %pd2\n", dentry);
  
         len = sizeof(__name);
         error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -1078,8 +1067,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
         int error;
  
         server = NCP_SERVER(dir);
-       DPRINTK("ncp_unlink: unlinking %s/%s\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       DPRINTK("ncp_unlink: unlinking %pd2\n", dentry);
         
         /*
          * Check whether to close the file ...
@@ -1099,8 +1087,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
  #endif
         switch (error) {
                 case 0x00:
-                       DPRINTK("ncp: removed %s/%s\n",
-                               dentry->d_parent->d_name.name, dentry->d_name.name);
+                       DPRINTK("ncp: removed %pd2\n", dentry);
                         break;
                 case 0x85:
                 case 0x8A:
@@ -1133,9 +1120,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
         int old_len, new_len;
         __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1];
  
-       DPRINTK("ncp_rename: %s/%s to %s/%s\n",
-               old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
-               new_dentry->d_parent->d_name.name, new_dentry->d_name.name);
+       DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry);
  
         ncp_age_dentry(server, old_dentry);
         ncp_age_dentry(server, new_dentry);
@@ -1165,8 +1150,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
  #endif
         switch (error) {
                 case 0x00:
-                               DPRINTK("ncp renamed %s -> %s.\n",
-                                old_dentry->d_name.name,new_dentry->d_name.name);
+                               DPRINTK("ncp renamed %pd -> %pd.\n",
+                                old_dentry, new_dentry);
                         break;
                 case 0x9E:
                         error = -ENAMETOOLONG;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c

index 122e260..8f5074e 100644 (file)
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -107,8 +107,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
         void* freepage;
         size_t freelen;
  
-       DPRINTK("ncp_file_read: enter %s/%s\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       DPRINTK("ncp_file_read: enter %pd2\n", dentry);
  
         pos = *ppos;
  
@@ -166,8 +165,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
  
         file_accessed(file);
  
-       DPRINTK("ncp_file_read: exit %s/%s\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       DPRINTK("ncp_file_read: exit %pd2\n", dentry);
  outrel:
         ncp_inode_close(inode);         
         return already_read ? already_read : error;
@@ -184,8 +182,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
         int errno;
         void* bouncebuffer;
  
-       DPRINTK("ncp_file_write: enter %s/%s\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       DPRINTK("ncp_file_write: enter %pd2\n", dentry);
         if ((ssize_t) count < 0)
                 return -EINVAL;
         pos = *ppos;
@@ -264,8 +261,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
                         i_size_write(inode, pos);
                 mutex_unlock(&inode->i_mutex);
         }
-       DPRINTK("ncp_file_write: exit %s/%s\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       DPRINTK("ncp_file_write: exit %pd2\n", dentry);
  outrel:
         ncp_inode_close(inode);         
         return already_written ? already_written : errno;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c

index 4659da6..2cf2ebe 100644 (file)
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -782,6 +782,17 @@ out:
         return error;
  }
  
+static void delayed_free(struct rcu_head *p)
+{
+       struct ncp_server *server = container_of(p, struct ncp_server, rcu);
+#ifdef CONFIG_NCPFS_NLS
+       /* unload the NLS charsets */
+       unload_nls(server->nls_vol);
+       unload_nls(server->nls_io);
+#endif /* CONFIG_NCPFS_NLS */
+       kfree(server);
+}
+
  static void ncp_put_super(struct super_block *sb)
  {
         struct ncp_server *server = NCP_SBP(sb);
@@ -792,11 +803,6 @@ static void ncp_put_super(struct super_block *sb)
  
         ncp_stop_tasks(server);
  
-#ifdef CONFIG_NCPFS_NLS
-       /* unload the NLS charsets */
-       unload_nls(server->nls_vol);
-       unload_nls(server->nls_io);
-#endif /* CONFIG_NCPFS_NLS */
         mutex_destroy(&server->rcv.creq_mutex);
         mutex_destroy(&server->root_setup_lock);
         mutex_destroy(&server->mutex);
@@ -813,8 +819,7 @@ static void ncp_put_super(struct super_block *sb)
         vfree(server->rxbuf);
         vfree(server->txbuf);
         vfree(server->packet);
-       sb->s_fs_info = NULL;
-       kfree(server);
+       call_rcu(&server->rcu, delayed_free);
  }
  
  static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h

index c51b2c5..b81e97a 100644 (file)
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -38,7 +38,7 @@ struct ncp_mount_data_kernel {
  };
  
  struct ncp_server {
-
+       struct rcu_head rcu;
         struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of
                                            interest for us later, so we store
                                            it completely. */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index 9a8676f..812154a 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -98,9 +98,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
         struct nfs_open_dir_context *ctx;
         struct rpc_cred *cred;
  
-       dfprintk(FILE, "NFS: open dir(%s/%s)\n",
-                       filp->f_path.dentry->d_parent->d_name.name,
-                       filp->f_path.dentry->d_name.name);
+       dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
  
         nfs_inc_stats(inode, NFSIOS_VFSOPEN);
  
@@ -297,11 +295,10 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
                                 if (ctx->duped > 0
                                     && ctx->dup_cookie == *desc->dir_cookie) {
                                         if (printk_ratelimit()) {
-                                               pr_notice("NFS: directory %s/%s contains a readdir loop."
+                                               pr_notice("NFS: directory %pD2 contains a readdir loop."
                                                                 "Please contact your server vendor.  "
                                                                 "The file: %s has duplicate cookie %llu\n",
-                                                               desc->file->f_dentry->d_parent->d_name.name,
-                                                               desc->file->f_dentry->d_name.name,
+                                                               desc->file,
                                                                 array->array[i].string.name,
                                                                 *desc->dir_cookie);
                                         }
@@ -822,9 +819,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
         struct nfs_open_dir_context *dir_ctx = file->private_data;
         int res = 0;
  
-       dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
-                       dentry->d_parent->d_name.name, dentry->d_name.name,
-                       (long long)ctx->pos);
+       dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
+                       file, (long long)ctx->pos);
         nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
  
         /*
@@ -880,22 +876,17 @@ out:
         nfs_unblock_sillyrename(dentry);
         if (res > 0)
                 res = 0;
-       dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
-                       dentry->d_parent->d_name.name, dentry->d_name.name,
-                       res);
+       dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
         return res;
  }
  
  static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
  {
-       struct dentry *dentry = filp->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = file_inode(filp);
         struct nfs_open_dir_context *dir_ctx = filp->private_data;
  
-       dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
-                       dentry->d_parent->d_name.name,
-                       dentry->d_name.name,
-                       offset, whence);
+       dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
+                       filp, offset, whence);
  
         mutex_lock(&inode->i_mutex);
         switch (whence) {
@@ -925,15 +916,12 @@ out:
  static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
                          int datasync)
  {
-       struct dentry *dentry = filp->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = file_inode(filp);
  
-       dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
-                       dentry->d_parent->d_name.name, dentry->d_name.name,
-                       datasync);
+       dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
  
         mutex_lock(&inode->i_mutex);
-       nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
+       nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
         mutex_unlock(&inode->i_mutex);
         return 0;
  }
@@ -1073,9 +1061,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
         }
  
         if (is_bad_inode(inode)) {
-               dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
-                               __func__, dentry->d_parent->d_name.name,
-                               dentry->d_name.name);
+               dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
+                               __func__, dentry);
                 goto out_bad;
         }
  
@@ -1125,9 +1112,8 @@ out_set_verifier:
         nfs_advise_use_readdirplus(dir);
   out_valid_noent:
         dput(parent);
-       dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
-                       __func__, dentry->d_parent->d_name.name,
-                       dentry->d_name.name);
+       dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
+                       __func__, dentry);
         return 1;
  out_zap_parent:
         nfs_zap_caches(dir);
@@ -1153,18 +1139,16 @@ out_zap_parent:
                 goto out_valid;
  
         dput(parent);
-       dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
-                       __func__, dentry->d_parent->d_name.name,
-                       dentry->d_name.name);
+       dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
+                       __func__, dentry);
         return 0;
  out_error:
         nfs_free_fattr(fattr);
         nfs_free_fhandle(fhandle);
         nfs4_label_free(label);
         dput(parent);
-       dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
-                       __func__, dentry->d_parent->d_name.name,
-                       dentry->d_name.name, error);
+       dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
+                       __func__, dentry, error);
         return error;
  }
  
@@ -1188,16 +1172,14 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
          * eventually need to do something more here.
          */
         if (!inode) {
-               dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n",
-                               __func__, dentry->d_parent->d_name.name,
-                               dentry->d_name.name);
+               dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
+                               __func__, dentry);
                 return 1;
         }
  
         if (is_bad_inode(inode)) {
-               dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
-                               __func__, dentry->d_parent->d_name.name,
-                               dentry->d_name.name);
+               dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
+                               __func__, dentry);
                 return 0;
         }
  
@@ -1212,9 +1194,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
   */
  static int nfs_dentry_delete(const struct dentry *dentry)
  {
-       dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
-               dentry->d_flags);
+       dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
+               dentry, dentry->d_flags);
  
         /* Unhash any dentry with a stale inode */
         if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
@@ -1292,8 +1273,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
         struct nfs4_label *label = NULL;
         int error;
  
-       dfprintk(VFS, "NFS: lookup(%s/%s)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
         nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
  
         res = ERR_PTR(-ENAMETOOLONG);
@@ -1424,8 +1404,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
         /* Expect a negative dentry */
         BUG_ON(dentry->d_inode);
  
-       dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
-                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry);
  
         err = nfs_check_flags(open_flags);
         if (err)
@@ -1614,8 +1594,8 @@ int nfs_create(struct inode *dir, struct dentry *dentry,
         int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
         int error;
  
-       dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
-                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: create(%s/%ld), %pd\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry);
  
         attr.ia_mode = mode;
         attr.ia_valid = ATTR_MODE;
@@ -1641,8 +1621,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
         struct iattr attr;
         int status;
  
-       dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
-                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry);
  
         if (!new_valid_dev(rdev))
                 return -EINVAL;
@@ -1670,8 +1650,8 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         struct iattr attr;
         int error;
  
-       dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
-                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry);
  
         attr.ia_valid = ATTR_MODE;
         attr.ia_mode = mode | S_IFDIR;
@@ -1698,8 +1678,8 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
  {
         int error;
  
-       dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
-                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry);
  
         trace_nfs_rmdir_enter(dir, dentry);
         if (dentry->d_inode) {
@@ -1734,8 +1714,7 @@ static int nfs_safe_remove(struct dentry *dentry)
         struct inode *inode = dentry->d_inode;
         int error = -EBUSY;
                 
-       dfprintk(VFS, "NFS: safe_remove(%s/%s)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
  
         /* If the dentry was sillyrenamed, we simply call d_delete() */
         if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
@@ -1768,8 +1747,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
         int error;
         int need_rehash = 0;
  
-       dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
-               dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id,
+               dir->i_ino, dentry);
  
         trace_nfs_unlink_enter(dir, dentry);
         spin_lock(&dentry->d_lock);
@@ -1819,8 +1798,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
         unsigned int pathlen = strlen(symname);
         int error;
  
-       dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
-               dir->i_ino, dentry->d_name.name, symname);
+       dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id,
+               dir->i_ino, dentry, symname);
  
         if (pathlen > PAGE_SIZE)
                 return -ENAMETOOLONG;
@@ -1842,9 +1821,9 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
         error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
         trace_nfs_symlink_exit(dir, dentry, error);
         if (error != 0) {
-               dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
+               dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n",
                         dir->i_sb->s_id, dir->i_ino,
-                       dentry->d_name.name, symname, error);
+                       dentry, symname, error);
                 d_drop(dentry);
                 __free_page(page);
                 return error;
@@ -1871,9 +1850,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
         struct inode *inode = old_dentry->d_inode;
         int error;
  
-       dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
-               old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
+               old_dentry, dentry);
  
         trace_nfs_link_enter(inode, dir, dentry);
         NFS_PROTO(inode)->return_delegation(inode);
@@ -1921,9 +1899,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         struct dentry *dentry = NULL, *rehash = NULL;
         int error = -EBUSY;
  
-       dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
-                old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
-                new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
+       dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
+                old_dentry, new_dentry,
                  d_count(new_dentry));
  
         trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c

index 91ff089..d71d66c 100644 (file)
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -124,9 +124,8 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
  ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
  {
  #ifndef CONFIG_NFS_SWAP
-       dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
-                       iocb->ki_filp->f_path.dentry->d_name.name,
-                       (long long) pos, nr_segs);
+       dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
+                       iocb->ki_filp, (long long) pos, nr_segs);
  
         return -EINVAL;
  #else
@@ -909,10 +908,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
         count = iov_length(iov, nr_segs);
         nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
  
-       dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",
-               file->f_path.dentry->d_parent->d_name.name,
-               file->f_path.dentry->d_name.name,
-               count, (long long) pos);
+       dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
+               file, count, (long long) pos);
  
         retval = 0;
         if (!count)
@@ -965,10 +962,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
         count = iov_length(iov, nr_segs);
         nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
  
-       dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n",
-               file->f_path.dentry->d_parent->d_name.name,
-               file->f_path.dentry->d_name.name,
-               count, (long long) pos);
+       dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
+               file, count, (long long) pos);
  
         retval = generic_write_checks(file, &pos, &count, 0);
         if (retval)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c

index 1e6bfdb..e2fcacf 100644 (file)
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -65,9 +65,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
  {
         int res;
  
-       dprintk("NFS: open file(%s/%s)\n",
-                       filp->f_path.dentry->d_parent->d_name.name,
-                       filp->f_path.dentry->d_name.name);
+       dprintk("NFS: open file(%pD2)\n", filp);
  
         nfs_inc_stats(inode, NFSIOS_VFSOPEN);
         res = nfs_check_flags(filp->f_flags);
@@ -81,9 +79,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
  int
  nfs_file_release(struct inode *inode, struct file *filp)
  {
-       dprintk("NFS: release(%s/%s)\n",
-                       filp->f_path.dentry->d_parent->d_name.name,
-                       filp->f_path.dentry->d_name.name);
+       dprintk("NFS: release(%pD2)\n", filp);
  
         nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
         return nfs_release(inode, filp);
@@ -123,10 +119,8 @@ force_reval:
  
  loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
  {
-       dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
-                       filp->f_path.dentry->d_parent->d_name.name,
-                       filp->f_path.dentry->d_name.name,
-                       offset, whence);
+       dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
+                       filp, offset, whence);
  
         /*
          * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
@@ -150,12 +144,9 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek);
  int
  nfs_file_flush(struct file *file, fl_owner_t id)
  {
-       struct dentry   *dentry = file->f_path.dentry;
-       struct inode    *inode = dentry->d_inode;
+       struct inode    *inode = file_inode(file);
  
-       dprintk("NFS: flush(%s/%s)\n",
-                       dentry->d_parent->d_name.name,
-                       dentry->d_name.name);
+       dprintk("NFS: flush(%pD2)\n", file);
  
         nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
         if ((file->f_mode & FMODE_WRITE) == 0)
@@ -177,15 +168,14 @@ ssize_t
  nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
                 unsigned long nr_segs, loff_t pos)
  {
-       struct dentry * dentry = iocb->ki_filp->f_path.dentry;
-       struct inode * inode = dentry->d_inode;
+       struct inode *inode = file_inode(iocb->ki_filp);
         ssize_t result;
  
         if (iocb->ki_filp->f_flags & O_DIRECT)
                 return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
  
-       dprintk("NFS: read(%s/%s, %lu@%lu)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
+       dprintk("NFS: read(%pD2, %lu@%lu)\n",
+               iocb->ki_filp,
                 (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
  
         result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
@@ -203,13 +193,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
                      struct pipe_inode_info *pipe, size_t count,
                      unsigned int flags)
  {
-       struct dentry *dentry = filp->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = file_inode(filp);
         ssize_t res;
  
-       dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
-               (unsigned long) count, (unsigned long long) *ppos);
+       dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
+               filp, (unsigned long) count, (unsigned long long) *ppos);
  
         res = nfs_revalidate_mapping(inode, filp->f_mapping);
         if (!res) {
@@ -224,12 +212,10 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read);
  int
  nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
  {
-       struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = file_inode(file);
         int     status;
  
-       dprintk("NFS: mmap(%s/%s)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       dprintk("NFS: mmap(%pD2)\n", file);
  
         /* Note: generic_file_mmap() returns ENOSYS on nommu systems
          *       so we call that before revalidating the mapping
@@ -258,15 +244,12 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
  int
  nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
  {
-       struct dentry *dentry = file->f_path.dentry;
         struct nfs_open_context *ctx = nfs_file_open_context(file);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = file_inode(file);
         int have_error, do_resend, status;
         int ret = 0;
  
-       dprintk("NFS: fsync file(%s/%s) datasync %d\n",
-                       dentry->d_parent->d_name.name, dentry->d_name.name,
-                       datasync);
+       dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
  
         nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
         do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
@@ -371,10 +354,8 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
         struct page *page;
         int once_thru = 0;
  
-       dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
-               file->f_path.dentry->d_parent->d_name.name,
-               file->f_path.dentry->d_name.name,
-               mapping->host->i_ino, len, (long long) pos);
+       dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n",
+               file, mapping->host->i_ino, len, (long long) pos);
  
  start:
         /*
@@ -414,10 +395,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
         struct nfs_open_context *ctx = nfs_file_open_context(file);
         int status;
  
-       dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
-               file->f_path.dentry->d_parent->d_name.name,
-               file->f_path.dentry->d_name.name,
-               mapping->host->i_ino, len, (long long) pos);
+       dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n",
+               file, mapping->host->i_ino, len, (long long) pos);
  
         /*
          * Zero any uninitialised parts of the page, and then mark the page
@@ -601,22 +580,21 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
  {
         struct page *page = vmf->page;
         struct file *filp = vma->vm_file;
-       struct dentry *dentry = filp->f_path.dentry;
+       struct inode *inode = file_inode(filp);
         unsigned pagelen;
         int ret = VM_FAULT_NOPAGE;
         struct address_space *mapping;
  
-       dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
-               filp->f_mapping->host->i_ino,
+       dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n",
+               filp, filp->f_mapping->host->i_ino,
                 (long long)page_offset(page));
  
         /* make sure the cache has finished storing the page */
-       nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+       nfs_fscache_wait_on_page_write(NFS_I(inode), page);
  
         lock_page(page);
         mapping = page_file_mapping(page);
-       if (mapping != dentry->d_inode->i_mapping)
+       if (mapping != inode->i_mapping)
                 goto out_unlock;
  
         wait_on_page_writeback(page);
@@ -659,22 +637,21 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
  ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
                        unsigned long nr_segs, loff_t pos)
  {
-       struct dentry * dentry = iocb->ki_filp->f_path.dentry;
-       struct inode * inode = dentry->d_inode;
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
         unsigned long written = 0;
         ssize_t result;
         size_t count = iov_length(iov, nr_segs);
  
-       result = nfs_key_timeout_notify(iocb->ki_filp, inode);
+       result = nfs_key_timeout_notify(file, inode);
         if (result)
                 return result;
  
-       if (iocb->ki_filp->f_flags & O_DIRECT)
+       if (file->f_flags & O_DIRECT)
                 return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
  
-       dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
-               (unsigned long) count, (long long) pos);
+       dprintk("NFS: write(%pD2, %lu@%Ld)\n",
+               file, (unsigned long) count, (long long) pos);
  
         result = -EBUSY;
         if (IS_SWAPFILE(inode))
@@ -682,8 +659,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
         /*
          * O_APPEND implies that we must revalidate the file length.
          */
-       if (iocb->ki_filp->f_flags & O_APPEND) {
-               result = nfs_revalidate_file_size(inode, iocb->ki_filp);
+       if (file->f_flags & O_APPEND) {
+               result = nfs_revalidate_file_size(inode, file);
                 if (result)
                         goto out;
         }
@@ -697,8 +674,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
                 written = result;
  
         /* Return error values for O_DSYNC and IS_SYNC() */
-       if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
-               int err = vfs_fsync(iocb->ki_filp, 0);
+       if (result >= 0 && nfs_need_sync_write(file, inode)) {
+               int err = vfs_fsync(file, 0);
                 if (err < 0)
                         result = err;
         }
@@ -717,14 +694,12 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
                               struct file *filp, loff_t *ppos,
                               size_t count, unsigned int flags)
  {
-       struct dentry *dentry = filp->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = file_inode(filp);
         unsigned long written = 0;
         ssize_t ret;
  
-       dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
-               (unsigned long) count, (unsigned long long) *ppos);
+       dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
+               filp, (unsigned long) count, (unsigned long long) *ppos);
  
         /*
          * The combination of splice and an O_APPEND destination is disallowed.
@@ -883,10 +858,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
         int ret = -ENOLCK;
         int is_local = 0;
  
-       dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
-                       filp->f_path.dentry->d_parent->d_name.name,
-                       filp->f_path.dentry->d_name.name,
-                       fl->fl_type, fl->fl_flags,
+       dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
+                       filp, fl->fl_type, fl->fl_flags,
                         (long long)fl->fl_start, (long long)fl->fl_end);
  
         nfs_inc_stats(inode, NFSIOS_VFSLOCK);
@@ -923,10 +896,8 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
         struct inode *inode = filp->f_mapping->host;
         int is_local = 0;
  
-       dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
-                       filp->f_path.dentry->d_parent->d_name.name,
-                       filp->f_path.dentry->d_name.name,
-                       fl->fl_type, fl->fl_flags);
+       dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
+                       filp, fl->fl_type, fl->fl_flags);
  
         if (!(fl->fl_flags & FL_FLOCK))
                 return -ENOLCK;
@@ -960,9 +931,7 @@ EXPORT_SYMBOL_GPL(nfs_flock);
   */
  int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
  {
-       dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
-                       file->f_path.dentry->d_parent->d_name.name,
-                       file->f_path.dentry->d_name.name, arg);
+       dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg);
         return -EINVAL;
  }
  EXPORT_SYMBOL_GPL(nfs_setlease);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c

index 348b535..b5a0afc 100644 (file)
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -253,9 +253,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
  
         dprintk("--> nfs_do_submount()\n");
  
-       dprintk("%s: submounting on %s/%s\n", __func__,
-                       dentry->d_parent->d_name.name,
-                       dentry->d_name.name);
+       dprintk("%s: submounting on %pd2\n", __func__,
+                       dentry);
         if (page == NULL)
                 goto out;
         devname = nfs_devname(dentry, page, PAGE_SIZE);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c

index 90cb10d..01b6f6a 100644 (file)
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -321,7 +321,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
         umode_t mode = sattr->ia_mode;
         int status = -ENOMEM;
  
-       dprintk("NFS call  create %s\n", dentry->d_name.name);
+       dprintk("NFS call  create %pd\n", dentry);
  
         data = nfs3_alloc_createdata();
         if (data == NULL)
@@ -548,7 +548,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
         if (len > NFS3_MAXPATHLEN)
                 return -ENAMETOOLONG;
  
-       dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+       dprintk("NFS call  symlink %pd\n", dentry);
  
         data = nfs3_alloc_createdata();
         if (data == NULL)
@@ -576,7 +576,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
         umode_t mode = sattr->ia_mode;
         int status = -ENOMEM;
  
-       dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+       dprintk("NFS call  mkdir %pd\n", dentry);
  
         sattr->ia_mode &= ~current_umask();
  
@@ -695,7 +695,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
         umode_t mode = sattr->ia_mode;
         int status = -ENOMEM;
  
-       dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
+       dprintk("NFS call  mknod %pd %u:%u\n", dentry,
                         MAJOR(rdev), MINOR(rdev));
  
         sattr->ia_mode &= ~current_umask();
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c

index 1f01b55..8de3407 100644 (file)
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -31,9 +31,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
          * -EOPENSTALE.  The VFS will retry the lookup/create/open.
          */
  
-       dprintk("NFS: open file(%s/%s)\n",
-               dentry->d_parent->d_name.name,
-               dentry->d_name.name);
+       dprintk("NFS: open file(%pd2)\n", dentry);
  
         if ((openflags & O_ACCMODE) == 3)
                 openflags--;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c

index c08cbf4..4e7f05d 100644 (file)
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -292,8 +292,7 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
         if (locations == NULL || locations->nlocations <= 0)
                 goto out;
  
-       dprintk("%s: referral at %s/%s\n", __func__,
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       dprintk("%s: referral at %pd2\n", __func__, dentry);
  
         page = (char *) __get_free_page(GFP_USER);
         if (!page)
@@ -357,8 +356,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
         mnt = ERR_PTR(-ENOENT);
  
         parent = dget_parent(dentry);
-       dprintk("%s: getting locations for %s/%s\n",
-               __func__, parent->d_name.name, dentry->d_name.name);
+       dprintk("%s: getting locations for %pd2\n",
+               __func__, dentry);
  
         err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
         dput(parent);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index 5ab33c0..659990c 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3771,9 +3771,8 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
         };
         int                     status;
  
-       dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__,
-                       dentry->d_parent->d_name.name,
-                       dentry->d_name.name,
+       dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
+                       dentry,
                         (unsigned long long)cookie);
         nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
         res.pgbase = args.pgbase;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c

index a8f57c7..fddbba2 100644 (file)
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -235,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
         };
         int status = -ENOMEM;
  
-       dprintk("NFS call  create %s\n", dentry->d_name.name);
+       dprintk("NFS call  create %pd\n", dentry);
         data = nfs_alloc_createdata(dir, dentry, sattr);
         if (data == NULL)
                 goto out;
@@ -265,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
         umode_t mode;
         int status = -ENOMEM;
  
-       dprintk("NFS call  mknod %s\n", dentry->d_name.name);
+       dprintk("NFS call  mknod %pd\n", dentry);
  
         mode = sattr->ia_mode;
         if (S_ISFIFO(mode)) {
@@ -423,7 +423,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
         };
         int status = -ENAMETOOLONG;
  
-       dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+       dprintk("NFS call  symlink %pd\n", dentry);
  
         if (len > NFS2_MAXPATHLEN)
                 goto out;
@@ -462,7 +462,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
         };
         int status = -ENOMEM;
  
-       dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+       dprintk("NFS call  mkdir %pd\n", dentry);
         data = nfs_alloc_createdata(dir, dentry, sattr);
         if (data == NULL)
                 goto out;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c

index 0c29b1b..11d7894 100644 (file)
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -495,9 +495,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
         struct rpc_task *task;
         int            error = -EBUSY;
  
-       dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name,
-               d_count(dentry));
+       dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
+               dentry, d_count(dentry));
         nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
  
         /*
@@ -521,8 +520,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
                                 SILLYNAME_FILEID_LEN, fileid,
                                 SILLYNAME_COUNTER_LEN, sillycounter);
  
-               dfprintk(VFS, "NFS: trying to rename %s to %s\n",
-                               dentry->d_name.name, silly);
+               dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
+                               dentry, silly);
  
                 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
                 /*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index ac1dc33..c1d5482 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -954,10 +954,8 @@ int nfs_updatepage(struct file *file, struct page *page,
  
         nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
  
-       dprintk("NFS:       nfs_updatepage(%s/%s %d@%lld)\n",
-               file->f_path.dentry->d_parent->d_name.name,
-               file->f_path.dentry->d_name.name, count,
-               (long long)(page_file_offset(page) + offset));
+       dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",
+               file, count, (long long)(page_file_offset(page) + offset));
  
         if (nfs_can_extend_write(file, page, inode)) {
                 count = max(count + offset, nfs_page_length(page));
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c

index e0a65a9..9c271f4 100644 (file)
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -385,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
  
         status = vfs_rmdir(parent->d_inode, child);
         if (status)
-               printk("failed to remove client recovery directory %s\n",
-                               child->d_name.name);
+               printk("failed to remove client recovery directory %pd\n",
+                               child);
         /* Keep trying, success or failure: */
         return 0;
  }
@@ -410,15 +410,15 @@ out:
         nfs4_release_reclaim(nn);
         if (status)
                 printk("nfsd4: failed to purge old clients from recovery"
-                       " directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
+                       " directory %pD\n", nn->rec_file);
  }
  
  static int
  load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
  {
         if (child->d_name.len != HEXDIR_LEN - 1) {
-               printk("nfsd4: illegal name %s in recovery directory\n",
-                               child->d_name.name);
+               printk("nfsd4: illegal name %pd in recovery directory\n",
+                               child);
                 /* Keep trying; maybe the others are OK: */
                 return 0;
         }
@@ -437,7 +437,7 @@ nfsd4_recdir_load(struct net *net) {
         status = nfsd4_list_rec_dir(load_recdir, nn);
         if (status)
                 printk("nfsd4: failed loading clients from recovery"
-                       " directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
+                       " directory %pD\n", nn->rec_file);
         return status;
  }
  
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 0874998..f36a30a 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3008,7 +3008,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
                 return NULL;
         locks_init_lock(fl);
         fl->fl_lmops = &nfsd_lease_mng_ops;
-       fl->fl_flags = FL_LEASE;
+       fl->fl_flags = FL_DELEG;
         fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
         fl->fl_end = OFFSET_MAX;
         fl->fl_owner = (fl_owner_t)(dp->dl_file);
@@ -3843,9 +3843,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         struct nfs4_ol_stateid *stp;
         struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
  
-       dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
-                       (int)cstate->current_fh.fh_dentry->d_name.len,
-                       cstate->current_fh.fh_dentry->d_name.name);
+       dprintk("NFSD: nfsd4_open_confirm on file %pd\n",
+                       cstate->current_fh.fh_dentry);
  
         status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
         if (status)
@@ -3922,9 +3921,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
         struct nfs4_ol_stateid *stp;
         struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
  
-       dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 
-                       (int)cstate->current_fh.fh_dentry->d_name.len,
-                       cstate->current_fh.fh_dentry->d_name.name);
+       dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", 
+                       cstate->current_fh.fh_dentry);
  
         /* We don't yet support WANT bits: */
         if (od->od_deleg_want)
@@ -3980,9 +3978,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         struct net *net = SVC_NET(rqstp);
         struct nfsd_net *nn = net_generic(net, nfsd_net_id);
  
-       dprintk("NFSD: nfsd4_close on file %.*s\n", 
-                       (int)cstate->current_fh.fh_dentry->d_name.len,
-                       cstate->current_fh.fh_dentry->d_name.name);
+       dprintk("NFSD: nfsd4_close on file %pd\n", 
+                       cstate->current_fh.fh_dentry);
  
         nfs4_lock_state();
         status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c

index 814afaa..3d0e15a 100644 (file)
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
                 tdentry = parent;
         }
         if (tdentry != exp->ex_path.dentry)
-               dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name);
+               dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry);
         rv = (tdentry == exp->ex_path.dentry);
         dput(tdentry);
         return rv;
@@ -253,8 +253,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
  
         if (S_ISDIR(dentry->d_inode->i_mode) &&
                         (dentry->d_flags & DCACHE_DISCONNECTED)) {
-               printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
-                               dentry->d_parent->d_name.name, dentry->d_name.name);
+               printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n",
+                               dentry);
         }
  
         fhp->fh_dentry = dentry;
@@ -361,10 +361,9 @@ skip_pseudoflavor_check:
         error = nfsd_permission(rqstp, exp, dentry, access);
  
         if (error) {
-               dprintk("fh_verify: %s/%s permission failure, "
+               dprintk("fh_verify: %pd2 permission failure, "
                         "acc=%x, error=%d\n",
-                       dentry->d_parent->d_name.name,
-                       dentry->d_name.name,
+                       dentry,
                         access, ntohl(error));
         }
  out:
@@ -514,14 +513,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
          */
  
         struct inode * inode = dentry->d_inode;
-       struct dentry *parent = dentry->d_parent;
         __u32 *datap;
         dev_t ex_dev = exp_sb(exp)->s_dev;
  
-       dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
+       dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
                 MAJOR(ex_dev), MINOR(ex_dev),
                 (long) exp->ex_path.dentry->d_inode->i_ino,
-               parent->d_name.name, dentry->d_name.name,
+               dentry,
                 (inode ? inode->i_ino : 0));
  
         /* Choose filehandle version and fsid type based on
@@ -534,13 +532,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
                 fh_put(ref_fh);
  
         if (fhp->fh_locked || fhp->fh_dentry) {
-               printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n",
-                      parent->d_name.name, dentry->d_name.name);
+               printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
+                      dentry);
         }
         if (fhp->fh_maxsize < NFS_FHSIZE)
-               printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n",
+               printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n",
                        fhp->fh_maxsize,
-                      parent->d_name.name, dentry->d_name.name);
+                      dentry);
  
         fhp->fh_dentry = dget(dentry); /* our internal copy */
         fhp->fh_export = exp;
@@ -613,8 +611,8 @@ out_bad:
         printk(KERN_ERR "fh_update: fh not verified!\n");
         goto out;
  out_negative:
-       printk(KERN_ERR "fh_update: %s/%s still negative!\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name);
+       printk(KERN_ERR "fh_update: %pd2 still negative!\n",
+               dentry);
         goto out;
  }
  
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h

index e5e6707..4775bc4 100644 (file)
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -173,8 +173,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
         BUG_ON(!dentry);
  
         if (fhp->fh_locked) {
-               printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
-                       dentry->d_parent->d_name.name, dentry->d_name.name);
+               printk(KERN_WARNING "fh_lock: %pd2 already locked!\n",
+                       dentry);
                 return;
         }
  
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index c827acb..94b5f5d 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -427,7 +427,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                         goto out_nfserr;
                 fh_lock(fhp);
  
-               host_err = notify_change(dentry, iap);
+               host_err = notify_change(dentry, iap, NULL);
                 err = nfserrno(host_err);
                 fh_unlock(fhp);
         }
@@ -988,7 +988,11 @@ static void kill_suid(struct dentry *dentry)
         ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
  
         mutex_lock(&dentry->d_inode->i_mutex);
-       notify_change(dentry, &ia);
+       /*
+        * Note we call this on write, so notify_change will not
+        * encounter any conflicting delegations:
+        */
+       notify_change(dentry, &ia, NULL);
         mutex_unlock(&dentry->d_inode->i_mutex);
  }
  
@@ -1317,9 +1321,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 if (!fhp->fh_locked) {
                         /* not actually possible */
                         printk(KERN_ERR
-                               "nfsd_create: parent %s/%s not locked!\n",
-                               dentry->d_parent->d_name.name,
-                               dentry->d_name.name);
+                               "nfsd_create: parent %pd2 not locked!\n",
+                               dentry);
                         err = nfserr_io;
                         goto out;
                 }
@@ -1329,8 +1332,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
          */
         err = nfserr_exist;
         if (dchild->d_inode) {
-               dprintk("nfsd_create: dentry %s/%s not negative!\n",
-                       dentry->d_name.name, dchild->d_name.name);
+               dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
+                       dentry, dchild);
                 goto out; 
         }
  
@@ -1737,7 +1740,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
                 err = nfserrno(host_err);
                 goto out_dput;
         }
-       host_err = vfs_link(dold, dirp, dnew);
+       host_err = vfs_link(dold, dirp, dnew, NULL);
         if (!host_err) {
                 err = nfserrno(commit_metadata(ffhp));
                 if (!err)
@@ -1838,7 +1841,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                 if (host_err)
                         goto out_dput_new;
         }
-       host_err = vfs_rename(fdir, odentry, tdir, ndentry);
+       host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
         if (!host_err) {
                 host_err = commit_metadata(tfhp);
                 if (!host_err)
@@ -1911,7 +1914,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
         if (host_err)
                 goto out_put;
         if (type != S_IFDIR)
-               host_err = vfs_unlink(dirp, rdentry);
+               host_err = vfs_unlink(dirp, rdentry, NULL);
         else
                 host_err = vfs_rmdir(dirp, rdentry);
         if (!host_err)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c

index 2778b02..ffb9b36 100644 (file)
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -55,7 +55,7 @@
   *
   * Return 1 if the attributes match and 0 if not.
   *
- * NOTE: This function runs with the inode->i_lock spin lock held so it is not
+ * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
   * allowed to sleep.
   */
  int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c

index f87f9bd..f29a90f 100644 (file)
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -386,19 +386,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
         u32 generation = 0;
  
         status = -EINVAL;
-       if (inode == NULL || inode->i_sb == NULL) {
-               mlog(ML_ERROR, "bad inode\n");
-               return status;
-       }
         sb = inode->i_sb;
         osb = OCFS2_SB(sb);
  
-       if (!args) {
-               mlog(ML_ERROR, "bad inode args\n");
-               make_bad_inode(inode);
-               return status;
-       }
-
         /*
          * To improve performance of cold-cache inode stats, we take
          * the cluster lock here if possible.
diff --git a/fs/open.c b/fs/open.c

index d420331..4b3e1ed 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -57,7 +57,8 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
                 newattrs.ia_valid |= ret | ATTR_FORCE;
  
         mutex_lock(&dentry->d_inode->i_mutex);
-       ret = notify_change(dentry, &newattrs);
+       /* Note any delegations or leases have already been broken: */
+       ret = notify_change(dentry, &newattrs, NULL);
         mutex_unlock(&dentry->d_inode->i_mutex);
         return ret;
  }
@@ -464,21 +465,28 @@ out:
  static int chmod_common(struct path *path, umode_t mode)
  {
         struct inode *inode = path->dentry->d_inode;
+       struct inode *delegated_inode = NULL;
         struct iattr newattrs;
         int error;
  
         error = mnt_want_write(path->mnt);
         if (error)
                 return error;
+retry_deleg:
         mutex_lock(&inode->i_mutex);
         error = security_path_chmod(path, mode);
         if (error)
                 goto out_unlock;
         newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
         newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-       error = notify_change(path->dentry, &newattrs);
+       error = notify_change(path->dentry, &newattrs, &delegated_inode);
  out_unlock:
         mutex_unlock(&inode->i_mutex);
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
         mnt_drop_write(path->mnt);
         return error;
  }
@@ -522,6 +530,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
  static int chown_common(struct path *path, uid_t user, gid_t group)
  {
         struct inode *inode = path->dentry->d_inode;
+       struct inode *delegated_inode = NULL;
         int error;
         struct iattr newattrs;
         kuid_t uid;
@@ -546,12 +555,17 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
         if (!S_ISDIR(inode->i_mode))
                 newattrs.ia_valid |=
                         ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+retry_deleg:
         mutex_lock(&inode->i_mutex);
         error = security_path_chown(path, uid, gid);
         if (!error)
-               error = notify_change(path->dentry, &newattrs);
+               error = notify_change(path->dentry, &newattrs, &delegated_inode);
         mutex_unlock(&inode->i_mutex);
-
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
         return error;
  }
  
@@ -685,7 +699,6 @@ static int do_dentry_open(struct file *f,
         }
  
         f->f_mapping = inode->i_mapping;
-       file_sb_list_add(f, inode->i_sb);
  
         if (unlikely(f->f_mode & FMODE_PATH)) {
                 f->f_op = &empty_fops;
@@ -693,6 +706,10 @@ static int do_dentry_open(struct file *f,
         }
  
         f->f_op = fops_get(inode->i_fop);
+       if (unlikely(WARN_ON(!f->f_op))) {
+               error = -ENODEV;
+               goto cleanup_all;
+       }
  
         error = security_file_open(f, cred);
         if (error)
@@ -702,7 +719,7 @@ static int do_dentry_open(struct file *f,
         if (error)
                 goto cleanup_all;
  
-       if (!open && f->f_op)
+       if (!open)
                 open = f->f_op->open;
         if (open) {
                 error = open(inode, f);
@@ -720,7 +737,6 @@ static int do_dentry_open(struct file *f,
  
  cleanup_all:
         fops_put(f->f_op);
-       file_sb_list_del(f);
         if (f->f_mode & FMODE_WRITE) {
                 put_write_access(inode);
                 if (!special_file(inode->i_mode)) {
@@ -1023,7 +1039,7 @@ int filp_close(struct file *filp, fl_owner_t id)
                 return 0;
         }
  
-       if (filp->f_op && filp->f_op->flush)
+       if (filp->f_op->flush)
                 retval = filp->f_op->flush(filp, id);
  
         if (likely(!(filp->f_mode & FMODE_PATH))) {
diff --git a/fs/pnode.c b/fs/pnode.c

index 9af0df1..c7221bb 100644 (file)
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -264,12 +264,12 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
                 prev_src_mnt  = child;
         }
  out:
-       br_write_lock(&vfsmount_lock);
+       lock_mount_hash();
         while (!list_empty(&tmp_list)) {
                 child = list_first_entry(&tmp_list, struct mount, mnt_hash);
                 umount_tree(child, 0);
         }
-       br_write_unlock(&vfsmount_lock);
+       unlock_mount_hash();
         return ret;
  }
  
@@ -278,8 +278,7 @@ out:
   */
  static inline int do_refcount_check(struct mount *mnt, int count)
  {
-       int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
-       return (mycount > count);
+       return mnt_get_count(mnt) > count;
  }
  
  /*
@@ -311,7 +310,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
  
         for (m = propagation_next(parent, parent); m;
                         m = propagation_next(m, parent)) {
-               child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0);
+               child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
                 if (child && list_empty(&child->mnt_mounts) &&
                     (ret = do_refcount_check(child, 1)))
                         break;
@@ -333,8 +332,8 @@ static void __propagate_umount(struct mount *mnt)
         for (m = propagation_next(parent, parent); m;
                         m = propagation_next(m, parent)) {
  
-               struct mount *child = __lookup_mnt(&m->mnt,
-                                       mnt->mnt_mountpoint, 0);
+               struct mount *child = __lookup_mnt_last(&m->mnt,
+                                               mnt->mnt_mountpoint);
                 /*
                  * umount the child only if the child has no
                  * other children
diff --git a/fs/proc/self.c b/fs/proc/self.c

index 6b6a993..ffeb202 100644 (file)
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -36,18 +36,10 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
         return NULL;
  }
  
-static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
-                               void *cookie)
-{
-       char *s = nd_get_link(nd);
-       if (!IS_ERR(s))
-               kfree(s);
-}
-
  static const struct inode_operations proc_self_inode_operations = {
         .readlink       = proc_self_readlink,
         .follow_link    = proc_self_follow_link,
-       .put_link       = proc_self_put_link,
+       .put_link       = kfree_put_link,
  };
  
  static unsigned self_inum;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c

index 5fe34c3..439406e 100644 (file)
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -20,15 +20,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
         struct proc_mounts *p = proc_mounts(file->private_data);
         struct mnt_namespace *ns = p->ns;
         unsigned res = POLLIN | POLLRDNORM;
+       int event;
  
         poll_wait(file, &p->ns->poll, wait);
  
-       br_read_lock(&vfsmount_lock);
-       if (p->m.poll_event != ns->event) {
-               p->m.poll_event = ns->event;
+       event = ACCESS_ONCE(ns->event);
+       if (p->m.poll_event != event) {
+               p->m.poll_event = event;
                 res |= POLLERR | POLLPRI;
         }
-       br_read_unlock(&vfsmount_lock);
  
         return res;
  }
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c

index d024505..e62c818 100644 (file)
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -60,10 +60,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
         struct buffer_head *bh;
  
         *res_dir = NULL;
-       if (!dir->i_sb) {
-               printk(KERN_WARNING "qnx4: no superblock on dir.\n");
-               return NULL;
-       }
         bh = NULL;
         block = offset = blkofs = 0;
         while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) {
diff --git a/fs/quota/quota.c b/fs/quota/quota.c

index dea86e8..2b363e2 100644 (file)
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -117,6 +117,7 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
  
  static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
  {
+       memset(dst, 0, sizeof(*dst));
         dst->dqb_bhardlimit = src->d_blk_hardlimit;
         dst->dqb_bsoftlimit = src->d_blk_softlimit;
         dst->dqb_curspace = src->d_bcount;
diff --git a/fs/read_write.c b/fs/read_write.c

index e3cd280..58e440d 100644 (file)
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -257,7 +257,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
  
         fn = no_llseek;
         if (file->f_mode & FMODE_LSEEK) {
-               if (file->f_op && file->f_op->llseek)
+               if (file->f_op->llseek)
                         fn = file->f_op->llseek;
         }
         return fn(file, offset, whence);
@@ -384,7 +384,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  
         if (!(file->f_mode & FMODE_READ))
                 return -EBADF;
-       if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
+       if (!file->f_op->read && !file->f_op->aio_read)
                 return -EINVAL;
         if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
                 return -EFAULT;
@@ -433,7 +433,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
         const char __user *p;
         ssize_t ret;
  
-       if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+       if (!file->f_op->write && !file->f_op->aio_write)
                 return -EINVAL;
  
         old_fs = get_fs();
@@ -460,7 +460,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
  
         if (!(file->f_mode & FMODE_WRITE))
                 return -EBADF;
-       if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+       if (!file->f_op->write && !file->f_op->aio_write)
                 return -EINVAL;
         if (unlikely(!access_ok(VERIFY_READ, buf, count)))
                 return -EFAULT;
@@ -727,11 +727,6 @@ static ssize_t do_readv_writev(int type, struct file *file,
         io_fn_t fn;
         iov_fn_t fnv;
  
-       if (!file->f_op) {
-               ret = -EINVAL;
-               goto out;
-       }
-
         ret = rw_copy_check_uvector(type, uvector, nr_segs,
                                     ARRAY_SIZE(iovstack), iovstack, &iov);
         if (ret <= 0)
@@ -778,7 +773,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
  {
         if (!(file->f_mode & FMODE_READ))
                 return -EBADF;
-       if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+       if (!file->f_op->aio_read && !file->f_op->read)
                 return -EINVAL;
  
         return do_readv_writev(READ, file, vec, vlen, pos);
@@ -791,7 +786,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
  {
         if (!(file->f_mode & FMODE_WRITE))
                 return -EBADF;
-       if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+       if (!file->f_op->aio_write && !file->f_op->write)
                 return -EINVAL;
  
         return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -906,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
         io_fn_t fn;
         iov_fn_t fnv;
  
-       ret = -EINVAL;
-       if (!file->f_op)
-               goto out;
-
         ret = -EFAULT;
         if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
                 goto out;
@@ -965,7 +956,7 @@ static size_t compat_readv(struct file *file,
                 goto out;
  
         ret = -EINVAL;
-       if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+       if (!file->f_op->aio_read && !file->f_op->read)
                 goto out;
  
         ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1032,7 +1023,7 @@ static size_t compat_writev(struct file *file,
                 goto out;
  
         ret = -EINVAL;
-       if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+       if (!file->f_op->aio_write && !file->f_op->write)
                 goto out;
  
         ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
diff --git a/fs/readdir.c b/fs/readdir.c

index 93d71e5..5b53d99 100644 (file)
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
  {
         struct inode *inode = file_inode(file);
         int res = -ENOTDIR;
-       if (!file->f_op || !file->f_op->iterate)
+       if (!file->f_op->iterate)
                 goto out;
  
         res = security_file_permission(file, MAY_READ);
diff --git a/fs/select.c b/fs/select.c

index dfd5cb1..467bb1c 100644 (file)
--- a/fs/select.c
+++ b/fs/select.c
@@ -454,7 +454,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
                                         const struct file_operations *f_op;
                                         f_op = f.file->f_op;
                                         mask = DEFAULT_POLLMASK;
-                                       if (f_op && f_op->poll) {
+                                       if (f_op->poll) {
                                                 wait_key_set(wait, in, out,
                                                              bit, busy_flag);
                                                 mask = (*f_op->poll)(f.file, wait);
@@ -761,7 +761,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
                 mask = POLLNVAL;
                 if (f.file) {
                         mask = DEFAULT_POLLMASK;
-                       if (f.file->f_op && f.file->f_op->poll) {
+                       if (f.file->f_op->poll) {
                                 pwait->_key = pollfd->events|POLLERR|POLLHUP;
                                 pwait->_key |= busy_flag;
                                 mask = f.file->f_op->poll(f.file, pwait);
diff --git a/fs/splice.c b/fs/splice.c

index 3b7ee65..46a08f7 100644 (file)
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -695,7 +695,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
         loff_t pos = sd->pos;
         int more;
  
-       if (!likely(file->f_op && file->f_op->sendpage))
+       if (!likely(file->f_op->sendpage))
                 return -EINVAL;
  
         more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
@@ -1099,7 +1099,7 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
         ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
                                 loff_t *, size_t, unsigned int);
  
-       if (out->f_op && out->f_op->splice_write)
+       if (out->f_op->splice_write)
                 splice_write = out->f_op->splice_write;
         else
                 splice_write = default_file_splice_write;
@@ -1125,7 +1125,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
         if (unlikely(ret < 0))
                 return ret;
  
-       if (in->f_op && in->f_op->splice_read)
+       if (in->f_op->splice_read)
                 splice_read = in->f_op->splice_read;
         else
                 splice_read = default_file_splice_read;
diff --git a/fs/stat.c b/fs/stat.c

index d0ea7ef..ae0c3ce 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,14 +37,21 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
  
  EXPORT_SYMBOL(generic_fillattr);
  
-int vfs_getattr(struct path *path, struct kstat *stat)
+/**
+ * vfs_getattr_nosec - getattr without security checks
+ * @path: file to get attributes from
+ * @stat: structure to return attributes in
+ *
+ * Get attributes without calling security_inode_getattr.
+ *
+ * Currently the only caller other than vfs_getattr is internal to the
+ * filehandle lookup code, which uses only the inode number and returns
+ * no attributes to any user.  Any other code probably wants
+ * vfs_getattr.
+ */
+int vfs_getattr_nosec(struct path *path, struct kstat *stat)
  {
         struct inode *inode = path->dentry->d_inode;
-       int retval;
-
-       retval = security_inode_getattr(path->mnt, path->dentry);
-       if (retval)
-               return retval;
  
         if (inode->i_op->getattr)
                 return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -53,6 +60,18 @@ int vfs_getattr(struct path *path, struct kstat *stat)
         return 0;
  }
  
+EXPORT_SYMBOL(vfs_getattr_nosec);
+
+int vfs_getattr(struct path *path, struct kstat *stat)
+{
+       int retval;
+
+       retval = security_inode_getattr(path->mnt, path->dentry);
+       if (retval)
+               return retval;
+       return vfs_getattr_nosec(path, stat);
+}
+
  EXPORT_SYMBOL(vfs_getattr);
  
  int vfs_fstat(unsigned int fd, struct kstat *stat)
diff --git a/fs/super.c b/fs/super.c

index 0225c20..e5f6c2c 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -129,33 +129,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
         return total_objects;
  }
  
-static int init_sb_writers(struct super_block *s, struct file_system_type *type)
-{
-       int err;
-       int i;
-
-       for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-               err = percpu_counter_init(&s->s_writers.counter[i], 0);
-               if (err < 0)
-                       goto err_out;
-               lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
-                                &type->s_writers_key[i], 0);
-       }
-       init_waitqueue_head(&s->s_writers.wait);
-       init_waitqueue_head(&s->s_writers.wait_unfrozen);
-       return 0;
-err_out:
-       while (--i >= 0)
-               percpu_counter_destroy(&s->s_writers.counter[i]);
-       return err;
-}
-
-static void destroy_sb_writers(struct super_block *s)
+/**
+ *     destroy_super   -       frees a superblock
+ *     @s: superblock to free
+ *
+ *     Frees a superblock.
+ */
+static void destroy_super(struct super_block *s)
  {
         int i;
-
+       list_lru_destroy(&s->s_dentry_lru);
+       list_lru_destroy(&s->s_inode_lru);
         for (i = 0; i < SB_FREEZE_LEVELS; i++)
                 percpu_counter_destroy(&s->s_writers.counter[i]);
+       security_sb_free(s);
+       WARN_ON(!list_empty(&s->s_mounts));
+       kfree(s->s_subtype);
+       kfree(s->s_options);
+       kfree_rcu(s, rcu);
  }
  
  /**
@@ -170,111 +161,74 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
  {
         struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
         static const struct super_operations default_op;
+       int i;
  
-       if (s) {
-               if (security_sb_alloc(s))
-                       goto out_free_sb;
+       if (!s)
+               return NULL;
  
-#ifdef CONFIG_SMP
-               s->s_files = alloc_percpu(struct list_head);
-               if (!s->s_files)
-                       goto err_out;
-               else {
-                       int i;
+       if (security_sb_alloc(s))
+               goto fail;
  
-                       for_each_possible_cpu(i)
-                               INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
-               }
-#else
-               INIT_LIST_HEAD(&s->s_files);
-#endif
-               if (init_sb_writers(s, type))
-                       goto err_out;
-               s->s_flags = flags;
-               s->s_bdi = &default_backing_dev_info;
-               INIT_HLIST_NODE(&s->s_instances);
-               INIT_HLIST_BL_HEAD(&s->s_anon);
-               INIT_LIST_HEAD(&s->s_inodes);
-
-               if (list_lru_init(&s->s_dentry_lru))
-                       goto err_out;
-               if (list_lru_init(&s->s_inode_lru))
-                       goto err_out_dentry_lru;
-
-               INIT_LIST_HEAD(&s->s_mounts);
-               init_rwsem(&s->s_umount);
-               lockdep_set_class(&s->s_umount, &type->s_umount_key);
-               /*
-                * sget() can have s_umount recursion.
-                *
-                * When it cannot find a suitable sb, it allocates a new
-                * one (this one), and tries again to find a suitable old
-                * one.
-                *
-                * In case that succeeds, it will acquire the s_umount
-                * lock of the old one. Since these are clearly distrinct
-                * locks, and this object isn't exposed yet, there's no
-                * risk of deadlocks.
-                *
-                * Annotate this by putting this lock in a different
-                * subclass.
-                */
-               down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
-               s->s_count = 1;
-               atomic_set(&s->s_active, 1);
-               mutex_init(&s->s_vfs_rename_mutex);
-               lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
-               mutex_init(&s->s_dquot.dqio_mutex);
-               mutex_init(&s->s_dquot.dqonoff_mutex);
-               init_rwsem(&s->s_dquot.dqptr_sem);
-               s->s_maxbytes = MAX_NON_LFS;
-               s->s_op = &default_op;
-               s->s_time_gran = 1000000000;
-               s->cleancache_poolid = -1;
-
-               s->s_shrink.seeks = DEFAULT_SEEKS;
-               s->s_shrink.scan_objects = super_cache_scan;
-               s->s_shrink.count_objects = super_cache_count;
-               s->s_shrink.batch = 1024;
-               s->s_shrink.flags = SHRINKER_NUMA_AWARE;
+       for (i = 0; i < SB_FREEZE_LEVELS; i++) {
+               if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
+                       goto fail;
+               lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
+                                &type->s_writers_key[i], 0);
         }
-out:
+       init_waitqueue_head(&s->s_writers.wait);
+       init_waitqueue_head(&s->s_writers.wait_unfrozen);
+       s->s_flags = flags;
+       s->s_bdi = &default_backing_dev_info;
+       INIT_HLIST_NODE(&s->s_instances);
+       INIT_HLIST_BL_HEAD(&s->s_anon);
+       INIT_LIST_HEAD(&s->s_inodes);
+
+       if (list_lru_init(&s->s_dentry_lru))
+               goto fail;
+       if (list_lru_init(&s->s_inode_lru))
+               goto fail;
+
+       INIT_LIST_HEAD(&s->s_mounts);
+       init_rwsem(&s->s_umount);
+       lockdep_set_class(&s->s_umount, &type->s_umount_key);
+       /*
+        * sget() can have s_umount recursion.
+        *
+        * When it cannot find a suitable sb, it allocates a new
+        * one (this one), and tries again to find a suitable old
+        * one.
+        *
+        * In case that succeeds, it will acquire the s_umount
+        * lock of the old one. Since these are clearly distrinct
+        * locks, and this object isn't exposed yet, there's no
+        * risk of deadlocks.
+        *
+        * Annotate this by putting this lock in a different
+        * subclass.
+        */
+       down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
+       s->s_count = 1;
+       atomic_set(&s->s_active, 1);
+       mutex_init(&s->s_vfs_rename_mutex);
+       lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
+       mutex_init(&s->s_dquot.dqio_mutex);
+       mutex_init(&s->s_dquot.dqonoff_mutex);
+       init_rwsem(&s->s_dquot.dqptr_sem);
+       s->s_maxbytes = MAX_NON_LFS;
+       s->s_op = &default_op;
+       s->s_time_gran = 1000000000;
+       s->cleancache_poolid = -1;
+
+       s->s_shrink.seeks = DEFAULT_SEEKS;
+       s->s_shrink.scan_objects = super_cache_scan;
+       s->s_shrink.count_objects = super_cache_count;
+       s->s_shrink.batch = 1024;
+       s->s_shrink.flags = SHRINKER_NUMA_AWARE;
         return s;
  
-err_out_dentry_lru:
-       list_lru_destroy(&s->s_dentry_lru);
-err_out:
-       security_sb_free(s);
-#ifdef CONFIG_SMP
-       if (s->s_files)
-               free_percpu(s->s_files);
-#endif
-       destroy_sb_writers(s);
-out_free_sb:
-       kfree(s);
-       s = NULL;
-       goto out;
-}
-
-/**
- *     destroy_super   -       frees a superblock
- *     @s: superblock to free
- *
- *     Frees a superblock.
- */
-static inline void destroy_super(struct super_block *s)
-{
-       list_lru_destroy(&s->s_dentry_lru);
-       list_lru_destroy(&s->s_inode_lru);
-#ifdef CONFIG_SMP
-       free_percpu(s->s_files);
-#endif
-       destroy_sb_writers(s);
-       security_sb_free(s);
-       WARN_ON(!list_empty(&s->s_mounts));
-       kfree(s->s_subtype);
-       kfree(s->s_options);
-       kfree(s);
+fail:
+       destroy_super(s);
+       return NULL;
  }
  
  /* Superblock refcounting  */
@@ -756,7 +710,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
            make sure there are no rw files opened */
         if (remount_ro) {
                 if (force) {
-                       mark_files_ro(sb);
+                       sb->s_readonly_remount = 1;
+                       smp_wmb();
                 } else {
                         retval = sb_prepare_remount_readonly(sb);
                         if (retval)
diff --git a/fs/sync.c b/fs/sync.c

index ff96f99..f155374 100644 (file)
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -180,7 +180,7 @@ SYSCALL_DEFINE1(syncfs, int, fd)
   */
  int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
  {
-       if (!file->f_op || !file->f_op->fsync)
+       if (!file->f_op->fsync)
                 return -EINVAL;
         return file->f_op->fsync(file, start, end, datasync);
  }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c

index 6e025e0..cc1febd 100644 (file)
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2563,9 +2563,9 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
         unsigned int from, to, ffs = chance(1, 2);
         unsigned char *p = (void *)buf;
  
-       from = prandom_u32() % (len + 1);
-       /* Corruption may only span one max. write unit */
-       to = min(len, ALIGN(from, c->max_write_size));
+       from = prandom_u32() % len;
+       /* Corruption span max to end of write unit */
+       to = min(len, ALIGN(from + 1, c->max_write_size));
  
         ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
                    ffs ? "0xFFs" : "random data");
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c

index 6b4947f..ea41649 100644 (file)
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -192,8 +192,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
         struct ubifs_dent_node *dent;
         struct ubifs_info *c = dir->i_sb->s_fs_info;
  
-       dbg_gen("'%.*s' in dir ino %lu",
-               dentry->d_name.len, dentry->d_name.name, dir->i_ino);
+       dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
  
         if (dentry->d_name.len > UBIFS_MAX_NLEN)
                 return ERR_PTR(-ENAMETOOLONG);
@@ -225,8 +224,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
                  * checking.
                  */
                 err = PTR_ERR(inode);
-               ubifs_err("dead directory entry '%.*s', error %d",
-                         dentry->d_name.len, dentry->d_name.name, err);
+               ubifs_err("dead directory entry '%pd', error %d",
+                         dentry, err);
                 ubifs_ro_mode(c, err);
                 goto out;
         }
@@ -260,8 +259,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
          * parent directory inode.
          */
  
-       dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
-               dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
+       dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+               dentry, mode, dir->i_ino);
  
         err = ubifs_budget_space(c, &req);
         if (err)
@@ -509,8 +508,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
          * changing the parent inode.
          */
  
-       dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
-               dentry->d_name.len, dentry->d_name.name, inode->i_ino,
+       dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
+               dentry, inode->i_ino,
                 inode->i_nlink, dir->i_ino);
         ubifs_assert(mutex_is_locked(&dir->i_mutex));
         ubifs_assert(mutex_is_locked(&inode->i_mutex));
@@ -566,8 +565,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
          * deletions.
          */
  
-       dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
-               dentry->d_name.len, dentry->d_name.name, inode->i_ino,
+       dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
+               dentry, inode->i_ino,
                 inode->i_nlink, dir->i_ino);
         ubifs_assert(mutex_is_locked(&dir->i_mutex));
         ubifs_assert(mutex_is_locked(&inode->i_mutex));
@@ -656,8 +655,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
          * because we have extra space reserved for deletions.
          */
  
-       dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
-               dentry->d_name.name, inode->i_ino, dir->i_ino);
+       dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
+               inode->i_ino, dir->i_ino);
         ubifs_assert(mutex_is_locked(&dir->i_mutex));
         ubifs_assert(mutex_is_locked(&inode->i_mutex));
         err = check_dir_empty(c, dentry->d_inode);
@@ -716,8 +715,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
          * directory inode.
          */
  
-       dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
-               dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
+       dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+               dentry, mode, dir->i_ino);
  
         err = ubifs_budget_space(c, &req);
         if (err)
@@ -778,8 +777,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
          * directory inode.
          */
  
-       dbg_gen("dent '%.*s' in dir ino %lu",
-               dentry->d_name.len, dentry->d_name.name, dir->i_ino);
+       dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino);
  
         if (!new_valid_dev(rdev))
                 return -EINVAL;
@@ -853,8 +851,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
          * directory inode.
          */
  
-       dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len,
-               dentry->d_name.name, symname, dir->i_ino);
+       dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry,
+               symname, dir->i_ino);
  
         if (len > UBIFS_MAX_INO_DATA)
                 return -ENAMETOOLONG;
@@ -979,10 +977,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
          * separately.
          */
  
-       dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu",
-               old_dentry->d_name.len, old_dentry->d_name.name,
-               old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
-               new_dentry->d_name.name, new_dir->i_ino);
+       dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
+               old_dentry, old_inode->i_ino, old_dir->i_ino,
+               new_dentry, new_dir->i_ino);
         ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
         ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
         if (unlink)
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c

index 76ca53c..9718da8 100644 (file)
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -668,8 +668,7 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
         ubifs_assert(!wbuf->used);
  
         for (i = 0; ; i++) {
-               int space_before = c->leb_size - wbuf->offs - wbuf->used;
-               int space_after;
+               int space_before, space_after;
  
                 cond_resched();
  
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c

index afaad07..0e045e7 100644 (file)
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -933,10 +933,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
         int move = (old_dir != new_dir);
         struct ubifs_inode *uninitialized_var(new_ui);
  
-       dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu",
-               old_dentry->d_name.len, old_dentry->d_name.name,
-               old_dir->i_ino, new_dentry->d_name.len,
-               new_dentry->d_name.name, new_dir->i_ino);
+       dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu",
+               old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino);
         ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
         ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
         ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c

index 3e4aa72..f69daa5 100644 (file)
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1630,8 +1630,10 @@ static int ubifs_remount_rw(struct ubifs_info *c)
         }
  
         c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
-       if (!c->write_reserve_buf)
+       if (!c->write_reserve_buf) {
+               err = -ENOMEM;
                 goto out;
+       }
  
         err = ubifs_lpt_init(c, 0, 1);
         if (err)
@@ -2064,8 +2066,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
         }
  
         sb->s_root = d_make_root(root);
-       if (!sb->s_root)
+       if (!sb->s_root) {
+               err = -ENOMEM;
                 goto out_umount;
+       }
  
         mutex_unlock(&c->umount_mutex);
         return 0;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c

index 0f7139b..5e0a63b 100644 (file)
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -303,8 +303,8 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
         union ubifs_key key;
         int err, type;
  
-       dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
-               host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+       dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name,
+               host->i_ino, dentry, size);
         ubifs_assert(mutex_is_locked(&host->i_mutex));
  
         if (size > UBIFS_MAX_INO_DATA)
@@ -367,8 +367,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
         union ubifs_key key;
         int err;
  
-       dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name,
-               host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+       dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
+               host->i_ino, dentry, size);
  
         err = check_namespace(&nm);
         if (err < 0)
@@ -426,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
         int err, len, written = 0;
         struct qstr nm = { .name = NULL };
  
-       dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino,
-               dentry->d_name.len, dentry->d_name.name, size);
+       dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino,
+               dentry, size);
  
         len = host_ui->xattr_names + host_ui->xattr_cnt;
         if (!buffer)
@@ -529,8 +529,8 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
         union ubifs_key key;
         int err;
  
-       dbg_gen("xattr '%s', ino %lu ('%.*s')", name,
-               host->i_ino, dentry->d_name.len, dentry->d_name.name);
+       dbg_gen("xattr '%s', ino %lu ('%pd')", name,
+               host->i_ino, dentry);
         ubifs_assert(mutex_is_locked(&host->i_mutex));
  
         err = check_namespace(&nm);
diff --git a/fs/udf/super.c b/fs/udf/super.c

index 9121938..3306b9f 100644 (file)
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -76,6 +76,9 @@
  
  #define UDF_DEFAULT_BLOCKSIZE 2048
  
+#define VSD_FIRST_SECTOR_OFFSET                32768
+#define VSD_MAX_SECTOR_OFFSET          0x800000
+
  enum { UDF_MAX_LINKS = 0xffff };
  
  /* These are the "meat" - everything else is stuffing */
@@ -685,7 +688,7 @@ out_unlock:
  static loff_t udf_check_vsd(struct super_block *sb)
  {
         struct volStructDesc *vsd = NULL;
-       loff_t sector = 32768;
+       loff_t sector = VSD_FIRST_SECTOR_OFFSET;
         int sectorsize;
         struct buffer_head *bh = NULL;
         int nsr02 = 0;
@@ -703,8 +706,18 @@ static loff_t udf_check_vsd(struct super_block *sb)
         udf_debug("Starting at sector %u (%ld byte sectors)\n",
                   (unsigned int)(sector >> sb->s_blocksize_bits),
                   sb->s_blocksize);
-       /* Process the sequence (if applicable) */
-       for (; !nsr02 && !nsr03; sector += sectorsize) {
+       /* Process the sequence (if applicable). The hard limit on the sector
+        * offset is arbitrary, hopefully large enough so that all valid UDF
+        * filesystems will be recognised. There is no mention of an upper
+        * bound to the size of the volume recognition area in the standard.
+        *  The limit will prevent the code to read all the sectors of a
+        * specially crafted image (like a bluray disc full of CD001 sectors),
+        * potentially causing minutes or even hours of uninterruptible I/O
+        * activity. This actually happened with uninitialised SSD partitions
+        * (all 0xFF) before the check for the limit and all valid IDs were
+        * added */
+       for (; !nsr02 && !nsr03 && sector < VSD_MAX_SECTOR_OFFSET;
+            sector += sectorsize) {
                 /* Read a block */
                 bh = udf_tread(sb, sector >> sb->s_blocksize_bits);
                 if (!bh)
@@ -714,10 +727,7 @@ static loff_t udf_check_vsd(struct super_block *sb)
                 vsd = (struct volStructDesc *)(bh->b_data +
                                               (sector & (sb->s_blocksize - 1)));
  
-               if (vsd->stdIdent[0] == 0) {
-                       brelse(bh);
-                       break;
-               } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
+               if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001,
                                     VSD_STD_ID_LEN)) {
                         switch (vsd->structType) {
                         case 0:
@@ -753,6 +763,17 @@ static loff_t udf_check_vsd(struct super_block *sb)
                 else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03,
                                     VSD_STD_ID_LEN))
                         nsr03 = sector;
+               else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BOOT2,
+                                   VSD_STD_ID_LEN))
+                       ; /* nothing */
+               else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CDW02,
+                                   VSD_STD_ID_LEN))
+                       ; /* nothing */
+               else {
+                       /* invalid id : end of volume recognition area */
+                       brelse(bh);
+                       break;
+               }
                 brelse(bh);
         }
  
@@ -760,7 +781,8 @@ static loff_t udf_check_vsd(struct super_block *sb)
                 return nsr03;
         else if (nsr02)
                 return nsr02;
-       else if (sector - (sbi->s_session << sb->s_blocksize_bits) == 32768)
+       else if (!bh && sector - (sbi->s_session << sb->s_blocksize_bits) ==
+                       VSD_FIRST_SECTOR_OFFSET)
                 return -1;
         else
                 return 0;
@@ -1270,6 +1292,9 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
          * PHYSICAL partitions are already set up
          */
         type1_idx = i;
+#ifdef UDFFS_DEBUG
+       map = NULL; /* supress 'maybe used uninitialized' warning */
+#endif
         for (i = 0; i < sbi->s_partitions; i++) {
                 map = &sbi->s_partmaps[i];
  
@@ -1891,7 +1916,9 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
                         return 0;
                 }
                 if (nsr_off == -1)
-                       udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n");
+                       udf_debug("Failed to read sector at offset %d. "
+                                 "Assuming open disc. Skipping validity "
+                                 "check\n", VSD_FIRST_SECTOR_OFFSET);
                 if (!sbi->s_last_block)
                         sbi->s_last_block = udf_get_last_block(sb);
         } else {
diff --git a/fs/utimes.c b/fs/utimes.c

index f4fb7ec..aa138d6 100644 (file)
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -53,6 +53,7 @@ static int utimes_common(struct path *path, struct timespec *times)
         int error;
         struct iattr newattrs;
         struct inode *inode = path->dentry->d_inode;
+       struct inode *delegated_inode = NULL;
  
         error = mnt_want_write(path->mnt);
         if (error)
@@ -101,9 +102,15 @@ static int utimes_common(struct path *path, struct timespec *times)
                                 goto mnt_drop_write_and_out;
                 }
         }
+retry_deleg:
         mutex_lock(&inode->i_mutex);
-       error = notify_change(path->dentry, &newattrs);
+       error = notify_change(path->dentry, &newattrs, &delegated_inode);
         mutex_unlock(&inode->i_mutex);
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
  
  mnt_drop_write_and_out:
         mnt_drop_write(path->mnt);
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h

index b685d3b..3d1a3af 100644 (file)
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -32,6 +32,6 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
  
  #endif
  
-extern int copy_siginfo_to_user(struct siginfo __user *to, struct siginfo *from);
+extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
  
  #endif
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h

index cf573c2..8013a45 100644 (file)
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -13,9 +13,6 @@ struct file_operations;
  struct file *anon_inode_getfile(const char *name,
                                 const struct file_operations *fops,
                                 void *priv, int flags);
-struct file *anon_inode_getfile_private(const char *name,
-                               const struct file_operations *fops,
-                               void *priv, int flags);
  int anon_inode_getfd(const char *name, const struct file_operations *fops,
                      void *priv, int flags);
  
diff --git a/include/linux/ata.h b/include/linux/ata.h

index bf4c69c..f2f4d8d 100644 (file)
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -219,6 +219,7 @@ enum {
         ATA_CMD_IDLE            = 0xE3, /* place in idle power mode */
         ATA_CMD_EDD             = 0x90, /* execute device diagnostic */
         ATA_CMD_DOWNLOAD_MICRO  = 0x92,
+       ATA_CMD_DOWNLOAD_MICRO_DMA = 0x93,
         ATA_CMD_NOP             = 0x00,
         ATA_CMD_FLUSH           = 0xE7,
         ATA_CMD_FLUSH_EXT       = 0xEA,
@@ -268,12 +269,15 @@ enum {
         ATA_CMD_WRITE_LOG_EXT   = 0x3F,
         ATA_CMD_READ_LOG_DMA_EXT = 0x47,
         ATA_CMD_WRITE_LOG_DMA_EXT = 0x57,
+       ATA_CMD_TRUSTED_NONDATA = 0x5B,
         ATA_CMD_TRUSTED_RCV     = 0x5C,
         ATA_CMD_TRUSTED_RCV_DMA = 0x5D,
         ATA_CMD_TRUSTED_SND     = 0x5E,
         ATA_CMD_TRUSTED_SND_DMA = 0x5F,
         ATA_CMD_PMP_READ        = 0xE4,
+       ATA_CMD_PMP_READ_DMA    = 0xE9,
         ATA_CMD_PMP_WRITE       = 0xE8,
+       ATA_CMD_PMP_WRITE_DMA   = 0xEB,
         ATA_CMD_CONF_OVERLAY    = 0xB1,
         ATA_CMD_SEC_SET_PASS    = 0xF1,
         ATA_CMD_SEC_UNLOCK      = 0xF2,
@@ -292,6 +296,9 @@ enum {
         ATA_CMD_CFA_TRANS_SECT  = 0x87,
         ATA_CMD_CFA_ERASE       = 0xC0,
         ATA_CMD_CFA_WRITE_MULT_NE = 0xCD,
+       ATA_CMD_REQ_SENSE_DATA  = 0x0B,
+       ATA_CMD_SANITIZE_DEVICE = 0xB4,
+
         /* marked obsolete in the ATA/ATAPI-7 spec */
         ATA_CMD_RESTORE         = 0x10,
  
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h

index 7554fd4..fd8bf32 100644 (file)
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -56,11 +56,12 @@ struct linux_binprm {
  
  /* Function parameter for binfmt->coredump */
  struct coredump_params {
-       siginfo_t *siginfo;
+       const siginfo_t *siginfo;
         struct pt_regs *regs;
         struct file *file;
         unsigned long limit;
         unsigned long mm_flags;
+       loff_t written;
  };
  
  /*
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 3561d30..39c1d94 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -612,11 +612,6 @@ struct cgroup_subsys {
         int subsys_id;
         int disabled;
         int early_init;
-       /*
-        * True if this subsys uses ID. ID is not available before cgroup_init()
-        * (not available in early_init time.)
-        */
-       bool use_id;
  
         /*
          * If %false, this subsystem is properly hierarchical -
@@ -642,9 +637,6 @@ struct cgroup_subsys {
          */
         struct cgroupfs_root *root;
         struct list_head sibling;
-       /* used when use_id == true */
-       struct idr idr;
-       spinlock_t id_lock;
  
         /* list of cftype_sets */
         struct list_head cftsets;
@@ -875,35 +867,6 @@ int css_scan_tasks(struct cgroup_subsys_state *css,
  int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
  int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
  
-/*
- * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
- * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
- * CSS ID is assigned at cgroup allocation (create) automatically
- * and removed when subsys calls free_css_id() function. This is because
- * the lifetime of cgroup_subsys_state is subsys's matter.
- *
- * Looking up and scanning function should be called under rcu_read_lock().
- * Taking cgroup_mutex is not necessary for following calls.
- * But the css returned by this routine can be "not populated yet" or "being
- * destroyed". The caller should check css and cgroup's status.
- */
-
-/*
- * Typically Called at ->destroy(), or somewhere the subsys frees
- * cgroup_subsys_state.
- */
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
-
-/* Find a cgroup_subsys_state which has given ID */
-
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
-
-/* Returns true if root is ancestor of cg */
-bool css_is_ancestor(struct cgroup_subsys_state *cg,
-                    const struct cgroup_subsys_state *root);
-
-/* Get id and depth of css */
-unsigned short css_id(struct cgroup_subsys_state *css);
  struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
                                          struct cgroup_subsys *ss);
  
diff --git a/include/linux/compat.h b/include/linux/compat.h

index ada34c9..eb8a49d 100644 (file)
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -362,7 +362,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
  long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
                        unsigned long bitmap_size);
  int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from);
  int get_compat_sigevent(struct sigevent *event,
                 const struct compat_sigevent __user *u_event);
  long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
diff --git a/include/linux/coredump.h b/include/linux/coredump.h

index a98f1ca..d016a12 100644 (file)
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -10,12 +10,14 @@
   * These are the only things you should do on a core-file: use only these
   * functions to write out all the necessary info.
   */
-extern int dump_write(struct file *file, const void *addr, int nr);
-extern int dump_seek(struct file *file, loff_t off);
+struct coredump_params;
+extern int dump_skip(struct coredump_params *cprm, size_t nr);
+extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
+extern int dump_align(struct coredump_params *cprm, int align);
  #ifdef CONFIG_COREDUMP
-extern void do_coredump(siginfo_t *siginfo);
+extern void do_coredump(const siginfo_t *siginfo);
  #else
-static inline void do_coredump(siginfo_t *siginfo) {}
+static inline void do_coredump(const siginfo_t *siginfo) {}
  #endif
  
  #endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h

index 59066e0..57e87e7 100644 (file)
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -169,13 +169,13 @@ struct dentry_operations {
   */
  
  /* d_flags entries */
-#define DCACHE_OP_HASH         0x0001
-#define DCACHE_OP_COMPARE      0x0002
-#define DCACHE_OP_REVALIDATE   0x0004
-#define DCACHE_OP_DELETE       0x0008
-#define DCACHE_OP_PRUNE         0x0010
+#define DCACHE_OP_HASH                 0x00000001
+#define DCACHE_OP_COMPARE              0x00000002
+#define DCACHE_OP_REVALIDATE           0x00000004
+#define DCACHE_OP_DELETE               0x00000008
+#define DCACHE_OP_PRUNE                        0x00000010
  
-#define        DCACHE_DISCONNECTED     0x0020
+#define        DCACHE_DISCONNECTED             0x00000020
       /* This dentry is possibly not currently connected to the dcache tree, in
        * which case its parent will either be itself, or will have this flag as
        * well.  nfsd will not use a dentry with this bit set, but will first
@@ -186,30 +186,38 @@ struct dentry_operations {
        * dentry into place and return that dentry rather than the passed one,
        * typically using d_splice_alias. */
  
-#define DCACHE_REFERENCED      0x0040  /* Recently used, don't discard. */
-#define DCACHE_RCUACCESS       0x0080  /* Entry has ever been RCU-visible */
+#define DCACHE_REFERENCED              0x00000040 /* Recently used, don't discard. */
+#define DCACHE_RCUACCESS               0x00000080 /* Entry has ever been RCU-visible */
  
-#define DCACHE_CANT_MOUNT      0x0100
-#define DCACHE_GENOCIDE                0x0200
-#define DCACHE_SHRINK_LIST     0x0400
+#define DCACHE_CANT_MOUNT              0x00000100
+#define DCACHE_GENOCIDE                        0x00000200
+#define DCACHE_SHRINK_LIST             0x00000400
  
-#define DCACHE_OP_WEAK_REVALIDATE      0x0800
+#define DCACHE_OP_WEAK_REVALIDATE      0x00000800
  
-#define DCACHE_NFSFS_RENAMED   0x1000
+#define DCACHE_NFSFS_RENAMED           0x00001000
       /* this dentry has been "silly renamed" and has to be deleted on the last
        * dput() */
-#define DCACHE_COOKIE          0x2000  /* For use by dcookie subsystem */
-#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000
+#define DCACHE_COOKIE                  0x00002000 /* For use by dcookie subsystem */
+#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x00004000
       /* Parent inode is watched by some fsnotify listener */
  
-#define DCACHE_MOUNTED         0x10000 /* is a mountpoint */
-#define DCACHE_NEED_AUTOMOUNT  0x20000 /* handle automount on this dir */
-#define DCACHE_MANAGE_TRANSIT  0x40000 /* manage transit from this dirent */
+#define DCACHE_DENTRY_KILLED           0x00008000
+
+#define DCACHE_MOUNTED                 0x00010000 /* is a mountpoint */
+#define DCACHE_NEED_AUTOMOUNT          0x00020000 /* handle automount on this dir */
+#define DCACHE_MANAGE_TRANSIT          0x00040000 /* manage transit from this dirent */
  #define DCACHE_MANAGED_DENTRY \
         (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
  
-#define DCACHE_LRU_LIST                0x80000
-#define DCACHE_DENTRY_KILLED   0x100000
+#define DCACHE_LRU_LIST                        0x00080000
+
+#define DCACHE_ENTRY_TYPE              0x00700000
+#define DCACHE_MISS_TYPE               0x00000000 /* Negative dentry */
+#define DCACHE_DIRECTORY_TYPE          0x00100000 /* Normal directory */
+#define DCACHE_AUTODIR_TYPE            0x00200000 /* Lookupless directory (presumed automount) */
+#define DCACHE_SYMLINK_TYPE            0x00300000 /* Symlink */
+#define DCACHE_FILE_TYPE               0x00400000 /* Other file type */
  
  extern seqlock_t rename_lock;
  
@@ -224,6 +232,7 @@ static inline int dname_external(const struct dentry *dentry)
  extern void d_instantiate(struct dentry *, struct inode *);
  extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
  extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
+extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
  extern void __d_drop(struct dentry *dentry);
  extern void d_drop(struct dentry *dentry);
  extern void d_delete(struct dentry *);
@@ -393,6 +402,61 @@ static inline bool d_mountpoint(const struct dentry *dentry)
         return dentry->d_flags & DCACHE_MOUNTED;
  }
  
+/*
+ * Directory cache entry type accessor functions.
+ */
+static inline void __d_set_type(struct dentry *dentry, unsigned type)
+{
+       dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type;
+}
+
+static inline void __d_clear_type(struct dentry *dentry)
+{
+       __d_set_type(dentry, DCACHE_MISS_TYPE);
+}
+
+static inline void d_set_type(struct dentry *dentry, unsigned type)
+{
+       spin_lock(&dentry->d_lock);
+       __d_set_type(dentry, type);
+       spin_unlock(&dentry->d_lock);
+}
+
+static inline unsigned __d_entry_type(const struct dentry *dentry)
+{
+       return dentry->d_flags & DCACHE_ENTRY_TYPE;
+}
+
+static inline bool d_is_directory(const struct dentry *dentry)
+{
+       return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE;
+}
+
+static inline bool d_is_autodir(const struct dentry *dentry)
+{
+       return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE;
+}
+
+static inline bool d_is_symlink(const struct dentry *dentry)
+{
+       return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE;
+}
+
+static inline bool d_is_file(const struct dentry *dentry)
+{
+       return __d_entry_type(dentry) == DCACHE_FILE_TYPE;
+}
+
+static inline bool d_is_negative(const struct dentry *dentry)
+{
+       return __d_entry_type(dentry) == DCACHE_MISS_TYPE;
+}
+
+static inline bool d_is_positive(const struct dentry *dentry)
+{
+       return !d_is_negative(dentry);
+}
+
  extern int sysctl_vfs_cache_pressure;
  
  static inline unsigned long vfs_pressure_ratio(unsigned long val)
diff --git a/include/linux/elf.h b/include/linux/elf.h

index 40a3c0e..67a5fa7 100644 (file)
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -39,13 +39,13 @@ extern Elf64_Dyn _DYNAMIC [];
  
  /* Optional callbacks to write extra ELF notes. */
  struct file;
+struct coredump_params;
  
  #ifndef ARCH_HAVE_EXTRA_ELF_NOTES
  static inline int elf_coredump_extra_notes_size(void) { return 0; }
-static inline int elf_coredump_extra_notes_write(struct file *file,
-                       loff_t *foffset) { return 0; }
+static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { return 0; }
  #else
  extern int elf_coredump_extra_notes_size(void);
-extern int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int elf_coredump_extra_notes_write(struct coredump_params *cprm);
  #endif
  #endif /* _LINUX_ELF_H */
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h

index cdd3d13..698d51a 100644 (file)
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -6,6 +6,8 @@
  #include <asm/elf.h>
  #include <uapi/linux/elfcore.h>
  
+struct coredump_params;
+
  static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
  {
  #ifdef ELF_CORE_COPY_REGS
@@ -63,10 +65,9 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse
   */
  extern Elf_Half elf_core_extra_phdrs(void);
  extern int
-elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-                          unsigned long limit);
+elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
  extern int
-elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
+elf_core_write_extra_data(struct coredump_params *cprm);
  extern size_t elf_core_extra_data_size(void);
  
  #endif /* _LINUX_ELFCORE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 955dff5..bf5d574 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,10 +623,13 @@ static inline int inode_unhashed(struct inode *inode)
   * 0: the object of the current VFS operation
   * 1: parent
   * 2: child/target
- * 3: quota file
+ * 3: xattr
+ * 4: second non-directory
+ * The last is for certain operations (such as rename) which lock two
+ * non-directories at once.
   *
   * The locking order between these classes is
- * parent -> child -> normal -> xattr -> quota
+ * parent -> child -> normal -> xattr -> second non-directory
   */
  enum inode_i_mutex_lock_class
  {
@@ -634,9 +637,12 @@ enum inode_i_mutex_lock_class
         I_MUTEX_PARENT,
         I_MUTEX_CHILD,
         I_MUTEX_XATTR,
-       I_MUTEX_QUOTA
+       I_MUTEX_NONDIR2
  };
  
+void lock_two_nondirectories(struct inode *, struct inode*);
+void unlock_two_nondirectories(struct inode *, struct inode*);
+
  /*
   * NOTE: in a 32bit arch with a preemptable kernel and
   * an UP compile the i_size_read/write must be atomic
@@ -764,12 +770,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
  #define FILE_MNT_WRITE_RELEASED        2
  
  struct file {
-       /*
-        * fu_list becomes invalid after file_free is called and queued via
-        * fu_rcuhead for RCU freeing
-        */
         union {
-               struct list_head        fu_list;
                 struct llist_node       fu_llist;
                 struct rcu_head         fu_rcuhead;
         } f_u;
@@ -783,9 +784,6 @@ struct file {
          * Must not be taken from IRQ context.
          */
         spinlock_t              f_lock;
-#ifdef CONFIG_SMP
-       int                     f_sb_list_cpu;
-#endif
         atomic_long_t           f_count;
         unsigned int            f_flags;
         fmode_t                 f_mode;
@@ -882,6 +880,7 @@ static inline int file_check_writeable(struct file *filp)
  
  #define FL_POSIX       1
  #define FL_FLOCK       2
+#define FL_DELEG       4       /* NFSv4 delegation */
  #define FL_ACCESS      8       /* not trying to lock, just looking */
  #define FL_EXISTS      16      /* when unlocking, test for existence */
  #define FL_LEASE       32      /* lease held on this file */
@@ -1023,7 +1022,7 @@ extern int vfs_test_lock(struct file *, struct file_lock *);
  extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
  extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
  extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
-extern int __break_lease(struct inode *inode, unsigned int flags);
+extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
  extern void lease_get_mtime(struct inode *, struct timespec *time);
  extern int generic_setlease(struct file *, long, struct file_lock **);
  extern int vfs_setlease(struct file *, long, struct file_lock **);
@@ -1132,7 +1131,7 @@ static inline int flock_lock_file_wait(struct file *filp,
         return -ENOLCK;
  }
  
-static inline int __break_lease(struct inode *inode, unsigned int mode)
+static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
  {
         return 0;
  }
@@ -1264,11 +1263,6 @@ struct super_block {
  
         struct list_head        s_inodes;       /* all inodes */
         struct hlist_bl_head    s_anon;         /* anonymous dentries for (nfs) exporting */
-#ifdef CONFIG_SMP
-       struct list_head __percpu *s_files;
-#else
-       struct list_head        s_files;
-#endif
         struct list_head        s_mounts;       /* list of mounts; _not_ for fs use */
         struct block_device     *s_bdev;
         struct backing_dev_info *s_bdi;
@@ -1330,6 +1324,7 @@ struct super_block {
          */
         struct list_lru         s_dentry_lru ____cacheline_aligned_in_smp;
         struct list_lru         s_inode_lru ____cacheline_aligned_in_smp;
+       struct rcu_head         rcu;
  };
  
  extern struct timespec current_fs_time(struct super_block *sb);
@@ -1458,10 +1453,10 @@ extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
  extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
  extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
  extern int vfs_symlink(struct inode *, struct dentry *, const char *);
-extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
+extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
  extern int vfs_rmdir(struct inode *, struct dentry *);
-extern int vfs_unlink(struct inode *, struct dentry *);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
+extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **);
  
  /*
   * VFS dentry helper functions.
@@ -1875,6 +1870,17 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *,
         (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
  #define fops_put(fops) \
         do { if (fops) module_put((fops)->owner); } while(0)
+/*
+ * This one is to be used *ONLY* from ->open() instances.
+ * fops must be non-NULL, pinned down *and* module dependencies
+ * should be sufficient to pin the caller down as well.
+ */
+#define replace_fops(f, fops) \
+       do {    \
+               struct file *__file = (f); \
+               fops_put(__file->f_op); \
+               BUG_ON(!(__file->f_op = (fops))); \
+       } while(0)
  
  extern int register_filesystem(struct file_system_type *);
  extern int unregister_filesystem(struct file_system_type *);
@@ -1899,6 +1905,9 @@ extern bool fs_fully_visible(struct file_system_type *);
  
  extern int current_umask(void);
  
+extern void ihold(struct inode * inode);
+extern void iput(struct inode *);
+
  /* /sys/fs */
  extern struct kobject *fs_kobj;
  
@@ -1955,9 +1964,39 @@ static inline int locks_verify_truncate(struct inode *inode,
  static inline int break_lease(struct inode *inode, unsigned int mode)
  {
         if (inode->i_flock)
-               return __break_lease(inode, mode);
+               return __break_lease(inode, mode, FL_LEASE);
+       return 0;
+}
+
+static inline int break_deleg(struct inode *inode, unsigned int mode)
+{
+       if (inode->i_flock)
+               return __break_lease(inode, mode, FL_DELEG);
         return 0;
  }
+
+static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+{
+       int ret;
+
+       ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
+       if (ret == -EWOULDBLOCK && delegated_inode) {
+               *delegated_inode = inode;
+               ihold(inode);
+       }
+       return ret;
+}
+
+static inline int break_deleg_wait(struct inode **delegated_inode)
+{
+       int ret;
+
+       ret = break_deleg(*delegated_inode, O_WRONLY);
+       iput(*delegated_inode);
+       *delegated_inode = NULL;
+       return ret;
+}
+
  #else /* !CONFIG_FILE_LOCKING */
  static inline int locks_mandatory_locked(struct inode *inode)
  {
@@ -1997,6 +2036,22 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
         return 0;
  }
  
+static inline int break_deleg(struct inode *inode, unsigned int mode)
+{
+       return 0;
+}
+
+static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+{
+       return 0;
+}
+
+static inline int break_deleg_wait(struct inode **delegated_inode)
+{
+       BUG();
+       return 0;
+}
+
  #endif /* CONFIG_FILE_LOCKING */
  
  /* fs/open.c */
@@ -2223,7 +2278,7 @@ extern void emergency_remount(void);
  #ifdef CONFIG_BLOCK
  extern sector_t bmap(struct inode *, sector_t);
  #endif
-extern int notify_change(struct dentry *, struct iattr *);
+extern int notify_change(struct dentry *, struct iattr *, struct inode **);
  extern int inode_permission(struct inode *, int);
  extern int generic_permission(struct inode *, int);
  
@@ -2337,8 +2392,6 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
  extern int inode_init_always(struct super_block *, struct inode *);
  extern void inode_init_once(struct inode *);
  extern void address_space_init_once(struct address_space *mapping);
-extern void ihold(struct inode * inode);
-extern void iput(struct inode *);
  extern struct inode * igrab(struct inode *);
  extern ino_t iunique(struct super_block *, ino_t);
  extern int inode_needs_sync(struct inode *inode);
@@ -2507,8 +2560,10 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len,
                 int nofs);
  extern int page_symlink(struct inode *inode, const char *symname, int len);
  extern const struct inode_operations page_symlink_inode_operations;
+extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
  extern int generic_readlink(struct dentry *, char __user *, int);
  extern void generic_fillattr(struct inode *, struct kstat *);
+int vfs_getattr_nosec(struct path *path, struct kstat *stat);
  extern int vfs_getattr(struct path *, struct kstat *);
  void __inode_add_bytes(struct inode *inode, loff_t bytes);
  void inode_add_bytes(struct inode *inode, loff_t bytes);
@@ -2567,6 +2622,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
  extern int simple_write_end(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned copied,
                         struct page *page, void *fsdata);
+extern struct inode *alloc_anon_inode(struct super_block *);
  
  extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
  extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/lglock.h b/include/linux/lglock.h

index 0d24e93..96549ab 100644 (file)
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -25,16 +25,6 @@
  #include <linux/cpu.h>
  #include <linux/notifier.h>
  
-/* can make br locks by using local lock for read side, global lock for write */
-#define br_lock_init(name)     lg_lock_init(name, #name)
-#define br_read_lock(name)     lg_local_lock(name)
-#define br_read_unlock(name)   lg_local_unlock(name)
-#define br_write_lock(name)    lg_global_lock(name)
-#define br_write_unlock(name)  lg_global_unlock(name)
-
-#define DEFINE_BRLOCK(name)            DEFINE_LGLOCK(name)
-#define DEFINE_STATIC_BRLOCK(name)     DEFINE_STATIC_LGLOCK(name)
-
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  #define LOCKDEP_INIT_MAP lockdep_init_map
  #else
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h

index cb35835..f7eaf2d 100644 (file)
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -31,6 +31,7 @@
  #define I2O_MINOR              166
  #define MICROCODE_MINOR                184
  #define TUN_MINOR              200
+#define CUSE_MINOR             203
  #define MWAVE_MINOR            219     /* ACP/Mwave Modem */
  #define MPT_MINOR              220
  #define MPT2SAS_MINOR          221
diff --git a/include/linux/mount.h b/include/linux/mount.h

index 38cd98f..371d346 100644 (file)
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -49,6 +49,8 @@ struct mnt_namespace;
  
  #define MNT_LOCK_READONLY      0x400000
  #define MNT_LOCKED             0x800000
+#define MNT_DOOMED             0x1000000
+#define MNT_SYNC_UMOUNT                0x2000000
  
  struct vfsmount {
         struct dentry *mnt_root;        /* root of the mounted tree */
diff --git a/include/linux/namei.h b/include/linux/namei.h

index 8e47bc7..492de72 100644 (file)
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -16,7 +16,7 @@ struct nameidata {
         struct path     root;
         struct inode    *inode; /* path.dentry.d_inode */
         unsigned int    flags;
-       unsigned        seq;
+       unsigned        seq, m_seq;
         int             last_type;
         unsigned        depth;
         char *saved_names[MAX_NESTED_LINKS + 1];
diff --git a/include/linux/percpu.h b/include/linux/percpu.h

index c74088a..9e4761c 100644 (file)
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -375,22 +375,6 @@ do {                                                                       \
  # define this_cpu_or(pcp, val)         __pcpu_size_call(this_cpu_or_, (pcp), (val))
  #endif
  
-#ifndef this_cpu_xor
-# ifndef this_cpu_xor_1
-#  define this_cpu_xor_1(pcp, val)     _this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef this_cpu_xor_2
-#  define this_cpu_xor_2(pcp, val)     _this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef this_cpu_xor_4
-#  define this_cpu_xor_4(pcp, val)     _this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef this_cpu_xor_8
-#  define this_cpu_xor_8(pcp, val)     _this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# define this_cpu_xor(pcp, val)                __pcpu_size_call(this_cpu_or_, (pcp), (val))
-#endif
-
  #define _this_cpu_generic_add_return(pcp, val)                         \
  ({                                                                     \
         typeof(pcp) ret__;                                              \
@@ -629,22 +613,6 @@ do {                                                                       \
  # define __this_cpu_or(pcp, val)       __pcpu_size_call(__this_cpu_or_, (pcp), (val))
  #endif
  
-#ifndef __this_cpu_xor
-# ifndef __this_cpu_xor_1
-#  define __this_cpu_xor_1(pcp, val)   __this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef __this_cpu_xor_2
-#  define __this_cpu_xor_2(pcp, val)   __this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef __this_cpu_xor_4
-#  define __this_cpu_xor_4(pcp, val)   __this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# ifndef __this_cpu_xor_8
-#  define __this_cpu_xor_8(pcp, val)   __this_cpu_generic_to_op((pcp), (val), ^=)
-# endif
-# define __this_cpu_xor(pcp, val)      __pcpu_size_call(__this_cpu_xor_, (pcp), (val))
-#endif
-
  #define __this_cpu_generic_add_return(pcp, val)                                \
  ({                                                                     \
         __this_cpu_add(pcp, val);                                       \
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h

index e277266..7246ef3 100644 (file)
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -23,6 +23,7 @@ struct bsd_acct_struct;
  struct pid_namespace {
         struct kref kref;
         struct pidmap pidmap[PIDMAP_ENTRIES];
+       struct rcu_head rcu;
         int last_pid;
         unsigned int nr_hashed;
         struct task_struct *child_reaper;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h

index 52ae548..e0dc355 100644 (file)
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -36,6 +36,11 @@
                 { CURSEG_COLD_NODE,     "Cold NODE" },                  \
                 { NO_CHECK_TYPE,        "No TYPE" })
  
+#define show_file_type(type)                                           \
+       __print_symbolic(type,                                          \
+               { 0,            "FILE" },                               \
+               { 1,            "DIR" })
+
  #define show_gc_type(type)                                             \
         __print_symbolic(type,                                          \
                 { FG_GC,        "Foreground GC" },                      \
@@ -623,6 +628,52 @@ TRACE_EVENT(f2fs_do_submit_bio,
                 __entry->size)
  );
  
+DECLARE_EVENT_CLASS(f2fs__page,
+
+       TP_PROTO(struct page *page, int type),
+
+       TP_ARGS(page, type),
+
+       TP_STRUCT__entry(
+               __field(dev_t,  dev)
+               __field(ino_t,  ino)
+               __field(int, type)
+               __field(int, dir)
+               __field(pgoff_t, index)
+               __field(int, dirty)
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = page->mapping->host->i_sb->s_dev;
+               __entry->ino    = page->mapping->host->i_ino;
+               __entry->type   = type;
+               __entry->dir    = S_ISDIR(page->mapping->host->i_mode);
+               __entry->index  = page->index;
+               __entry->dirty  = PageDirty(page);
+       ),
+
+       TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d",
+               show_dev_ino(__entry),
+               show_block_type(__entry->type),
+               show_file_type(__entry->dir),
+               (unsigned long)__entry->index,
+               __entry->dirty)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty,
+
+       TP_PROTO(struct page *page, int type),
+
+       TP_ARGS(page, type)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
+
+       TP_PROTO(struct page *page, int type),
+
+       TP_ARGS(page, type)
+);
+
  TRACE_EVENT(f2fs_submit_write_page,
  
         TP_PROTO(struct page *page, block_t blk_addr, int type),
diff --git a/ipc/mqueue.c b/ipc/mqueue.c

index ae1996d..95827ce 100644 (file)
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -886,7 +886,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
                 err = -ENOENT;
         } else {
                 ihold(inode);
-               err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+               err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
         }
         dput(dentry);
  
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 8bd9cfd..e0839bc 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -124,38 +124,6 @@ struct cfent {
         struct simple_xattrs            xattrs;
  };
  
-/*
- * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
- * cgroup_subsys->use_id != 0.
- */
-#define CSS_ID_MAX     (65535)
-struct css_id {
-       /*
-        * The css to which this ID points. This pointer is set to valid value
-        * after cgroup is populated. If cgroup is removed, this will be NULL.
-        * This pointer is expected to be RCU-safe because destroy()
-        * is called after synchronize_rcu(). But for safe use, css_tryget()
-        * should be used for avoiding race.
-        */
-       struct cgroup_subsys_state __rcu *css;
-       /*
-        * ID of this css.
-        */
-       unsigned short id;
-       /*
-        * Depth in hierarchy which this ID belongs to.
-        */
-       unsigned short depth;
-       /*
-        * ID is freed by RCU. (and lookup routine is RCU safe.)
-        */
-       struct rcu_head rcu_head;
-       /*
-        * Hierarchy of CSS ID belongs to.
-        */
-       unsigned short stack[0]; /* Array of Length (depth+1) */
-};
-
  /*
   * cgroup_event represents events which userspace want to receive.
   */
@@ -387,9 +355,6 @@ struct cgrp_cset_link {
  static struct css_set init_css_set;
  static struct cgrp_cset_link init_cgrp_cset_link;
  
-static int cgroup_init_idr(struct cgroup_subsys *ss,
-                          struct cgroup_subsys_state *css);
-
  /*
   * css_set_lock protects the list of css_set objects, and the chain of
   * tasks off each css_set.  Nests outside task->alloc_lock due to
@@ -841,8 +806,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
         .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
  };
  
-static int alloc_css_id(struct cgroup_subsys_state *child_css);
-
  static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
  {
         struct inode *inode = new_inode(sb);
@@ -4240,21 +4203,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
                                 goto err;
                 }
         }
-
-       /* This cgroup is ready now */
-       for_each_root_subsys(cgrp->root, ss) {
-               struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
-               struct css_id *id = rcu_dereference_protected(css->id, true);
-
-               /*
-                * Update id->css pointer and make this css visible from
-                * CSS ID functions. This pointer will be dereferened
-                * from RCU-read-side without locks.
-                */
-               if (id)
-                       rcu_assign_pointer(id->css, css);
-       }
-
         return 0;
  err:
         cgroup_clear_dir(cgrp, subsys_mask);
@@ -4323,7 +4271,6 @@ static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
         css->cgroup = cgrp;
         css->ss = ss;
         css->flags = 0;
-       css->id = NULL;
  
         if (cgrp->parent)
                 css->parent = cgroup_css(cgrp->parent, ss);
@@ -4455,12 +4402,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                         goto err_free_all;
  
                 init_css(css, ss, cgrp);
-
-               if (ss->use_id) {
-                       err = alloc_css_id(css);
-                       if (err)
-                               goto err_free_all;
-               }
         }
  
         /*
@@ -4925,12 +4866,6 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
  
         /* our new subsystem will be attached to the dummy hierarchy. */
         init_css(css, ss, cgroup_dummy_top);
-       /* init_idr must be after init_css() because it sets css->id. */
-       if (ss->use_id) {
-               ret = cgroup_init_idr(ss, css);
-               if (ret)
-                       goto err_unload;
-       }
  
         /*
          * Now we need to entangle the css into the existing css_sets. unlike
@@ -4996,9 +4931,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
  
         offline_css(cgroup_css(cgroup_dummy_top, ss));
  
-       if (ss->use_id)
-               idr_destroy(&ss->idr);
-
         /* deassign the subsys_id */
         cgroup_subsys[ss->subsys_id] = NULL;
  
@@ -5025,8 +4957,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
         /*
          * remove subsystem's css from the cgroup_dummy_top and free it -
          * need to free before marking as null because ss->css_free needs
-        * the cgrp->subsys pointer to find their state. note that this
-        * also takes care of freeing the css_id.
+        * the cgrp->subsys pointer to find their state.
          */
         ss->css_free(cgroup_css(cgroup_dummy_top, ss));
         RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
@@ -5097,8 +5028,6 @@ int __init cgroup_init(void)
         for_each_builtin_subsys(ss, i) {
                 if (!ss->early_init)
                         cgroup_init_subsys(ss);
-               if (ss->use_id)
-                       cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
         }
  
         /* allocate id for the dummy hierarchy */
@@ -5518,181 +5447,6 @@ static int __init cgroup_disable(char *str)
  }
  __setup("cgroup_disable=", cgroup_disable);
  
-/*
- * Functons for CSS ID.
- */
-
-/* to get ID other than 0, this should be called when !cgroup_is_dead() */
-unsigned short css_id(struct cgroup_subsys_state *css)
-{
-       struct css_id *cssid;
-
-       /*
-        * This css_id() can return correct value when somone has refcnt
-        * on this or this is under rcu_read_lock(). Once css->id is allocated,
-        * it's unchanged until freed.
-        */
-       cssid = rcu_dereference_raw(css->id);
-
-       if (cssid)
-               return cssid->id;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(css_id);
-
-/**
- *  css_is_ancestor - test "root" css is an ancestor of "child"
- * @child: the css to be tested.
- * @root: the css supporsed to be an ancestor of the child.
- *
- * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
- * this function reads css->id, the caller must hold rcu_read_lock().
- * But, considering usual usage, the csses should be valid objects after test.
- * Assuming that the caller will do some action to the child if this returns
- * returns true, the caller must take "child";s reference count.
- * If "child" is valid object and this returns true, "root" is valid, too.
- */
-
-bool css_is_ancestor(struct cgroup_subsys_state *child,
-                   const struct cgroup_subsys_state *root)
-{
-       struct css_id *child_id;
-       struct css_id *root_id;
-
-       child_id  = rcu_dereference(child->id);
-       if (!child_id)
-               return false;
-       root_id = rcu_dereference(root->id);
-       if (!root_id)
-               return false;
-       if (child_id->depth < root_id->depth)
-               return false;
-       if (child_id->stack[root_id->depth] != root_id->id)
-               return false;
-       return true;
-}
-
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
-{
-       struct css_id *id = rcu_dereference_protected(css->id, true);
-
-       /* When this is called before css_id initialization, id can be NULL */
-       if (!id)
-               return;
-
-       BUG_ON(!ss->use_id);
-
-       rcu_assign_pointer(id->css, NULL);
-       rcu_assign_pointer(css->id, NULL);
-       spin_lock(&ss->id_lock);
-       idr_remove(&ss->idr, id->id);
-       spin_unlock(&ss->id_lock);
-       kfree_rcu(id, rcu_head);
-}
-EXPORT_SYMBOL_GPL(free_css_id);
-
-/*
- * This is called by init or create(). Then, calls to this function are
- * always serialized (By cgroup_mutex() at create()).
- */
-
-static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
-{
-       struct css_id *newid;
-       int ret, size;
-
-       BUG_ON(!ss->use_id);
-
-       size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
-       newid = kzalloc(size, GFP_KERNEL);
-       if (!newid)
-               return ERR_PTR(-ENOMEM);
-
-       idr_preload(GFP_KERNEL);
-       spin_lock(&ss->id_lock);
-       /* Don't use 0. allocates an ID of 1-65535 */
-       ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT);
-       spin_unlock(&ss->id_lock);
-       idr_preload_end();
-
-       /* Returns error when there are no free spaces for new ID.*/
-       if (ret < 0)
-               goto err_out;
-
-       newid->id = ret;
-       newid->depth = depth;
-       return newid;
-err_out:
-       kfree(newid);
-       return ERR_PTR(ret);
-
-}
-
-static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
-                                           struct cgroup_subsys_state *rootcss)
-{
-       struct css_id *newid;
-
-       spin_lock_init(&ss->id_lock);
-       idr_init(&ss->idr);
-
-       newid = get_new_cssid(ss, 0);
-       if (IS_ERR(newid))
-               return PTR_ERR(newid);
-
-       newid->stack[0] = newid->id;
-       RCU_INIT_POINTER(newid->css, rootcss);
-       RCU_INIT_POINTER(rootcss->id, newid);
-       return 0;
-}
-
-static int alloc_css_id(struct cgroup_subsys_state *child_css)
-{
-       struct cgroup_subsys_state *parent_css = css_parent(child_css);
-       struct css_id *child_id, *parent_id;
-       int i, depth;
-
-       parent_id = rcu_dereference_protected(parent_css->id, true);
-       depth = parent_id->depth + 1;
-
-       child_id = get_new_cssid(child_css->ss, depth);
-       if (IS_ERR(child_id))
-               return PTR_ERR(child_id);
-
-       for (i = 0; i < depth; i++)
-               child_id->stack[i] = parent_id->stack[i];
-       child_id->stack[depth] = child_id->id;
-       /*
-        * child_id->css pointer will be set after this cgroup is available
-        * see cgroup_populate_dir()
-        */
-       rcu_assign_pointer(child_css->id, child_id);
-
-       return 0;
-}
-
-/**
- * css_lookup - lookup css by id
- * @ss: cgroup subsys to be looked into.
- * @id: the id
- *
- * Returns pointer to cgroup_subsys_state if there is valid one with id.
- * NULL if not. Should be called under rcu_read_lock()
- */
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
-{
-       struct css_id *cssid = NULL;
-
-       BUG_ON(!ss->use_id);
-       cssid = idr_find(&ss->idr, id);
-
-       if (unlikely(!cssid))
-               return NULL;
-
-       return rcu_dereference(cssid->css);
-}
-EXPORT_SYMBOL_GPL(css_lookup);
-
  /**
   * css_from_dir - get corresponding css from the dentry of a cgroup dir
   * @dentry: directory dentry of interest
diff --git a/kernel/elfcore.c b/kernel/elfcore.c

index ff915ef..e556751 100644 (file)
--- a/kernel/elfcore.c
+++ b/kernel/elfcore.c
@@ -1,23 +1,19 @@
  #include <linux/elf.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
-
-#include <asm/elf.h>
-
+#include <linux/binfmts.h>
  
  Elf_Half __weak elf_core_extra_phdrs(void)
  {
         return 0;
  }
  
-int __weak elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-                                     unsigned long limit)
+int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
  {
         return 1;
  }
  
-int __weak elf_core_write_extra_data(struct file *file, size_t *size,
-                                    unsigned long limit)
+int __weak elf_core_write_extra_data(struct coredump_params *cprm)
  {
         return 1;
  }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c

index 4208655..06c62de 100644 (file)
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -132,6 +132,12 @@ out:
         return ERR_PTR(err);
  }
  
+static void delayed_free_pidns(struct rcu_head *p)
+{
+       kmem_cache_free(pid_ns_cachep,
+                       container_of(p, struct pid_namespace, rcu));
+}
+
  static void destroy_pid_namespace(struct pid_namespace *ns)
  {
         int i;
@@ -140,7 +146,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
         for (i = 0; i < PIDMAP_ENTRIES; i++)
                 kfree(ns->pidmap[i].page);
         put_user_ns(ns->user_ns);
-       kmem_cache_free(pid_ns_cachep, ns);
+       call_rcu(&ns->rcu, delayed_free_pidns);
  }
  
  struct pid_namespace *copy_pid_ns(unsigned long flags,
diff --git a/kernel/signal.c b/kernel/signal.c

index ded28b9..940b30e 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2723,7 +2723,7 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
  
  #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
  
-int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
  {
         int err;
  
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 3d4bb07..f20a57b 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -500,6 +500,29 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
         return (memcg == root_mem_cgroup);
  }
  
+/*
+ * We restrict the id in the range of [1, 65535], so it can fit into
+ * an unsigned short.
+ */
+#define MEM_CGROUP_ID_MAX      USHRT_MAX
+
+static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
+{
+       /*
+        * The ID of the root cgroup is 0, but memcg treat 0 as an
+        * invalid ID, so we return (cgroup_id + 1).
+        */
+       return memcg->css.cgroup->id + 1;
+}
+
+static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+       struct cgroup_subsys_state *css;
+
+       css = css_from_id(id - 1, &mem_cgroup_subsys);
+       return mem_cgroup_from_css(css);
+}
+
  /* Writing them here to avoid exposing memcg's inner layout */
  #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
  
@@ -571,16 +594,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
  #ifdef CONFIG_MEMCG_KMEM
  /*
   * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
- * There are two main reasons for not using the css_id for this:
- *  1) this works better in sparse environments, where we have a lot of memcgs,
- *     but only a few kmem-limited. Or also, if we have, for instance, 200
- *     memcgs, and none but the 200th is kmem-limited, we'd have to have a
- *     200 entry array for that.
- *
- *  2) In order not to violate the cgroup API, we would like to do all memory
- *     allocation in ->create(). At that point, we haven't yet allocated the
- *     css_id. Having a separate index prevents us from messing with the cgroup
- *     core for this
+ * The main reason for not using cgroup id for this:
+ *  this works better in sparse environments, where we have a lot of memcgs,
+ *  but only a few kmem-limited. Or also, if we have, for instance, 200
+ *  memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *  200 entry array for that.
   *
   * The current size of the caches array is stored in
   * memcg_limited_groups_array_size.  It will double each time we have to
@@ -595,14 +613,14 @@ int memcg_limited_groups_array_size;
   * cgroups is a reasonable guess. In the future, it could be a parameter or
   * tunable, but that is strictly not necessary.
   *
- * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
   * this constant directly from cgroup, but it is understandable that this is
   * better kept as an internal representation in cgroup.c. In any case, the
- * css_id space is not getting any smaller, and we don't have to necessarily
+ * cgrp_id space is not getting any smaller, and we don't have to necessarily
   * increase ours as well if it increases.
   */
  #define MEMCG_CACHES_MIN_SIZE 4
-#define MEMCG_CACHES_MAX_SIZE 65535
+#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
  
  /*
   * A lot of the calls to the cache allocation functions are expected to be
@@ -1409,7 +1427,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
                 return true;
         if (!root_memcg->use_hierarchy || !memcg)
                 return false;
-       return css_is_ancestor(&memcg->css, &root_memcg->css);
+       return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
  }
  
  static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
@@ -2827,15 +2845,10 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
   */
  static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
  {
-       struct cgroup_subsys_state *css;
-
         /* ID 0 is unused ID */
         if (!id)
                 return NULL;
-       css = css_lookup(&mem_cgroup_subsys, id);
-       if (!css)
-               return NULL;
-       return mem_cgroup_from_css(css);
+       return mem_cgroup_from_id(id);
  }
  
  struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -4344,7 +4357,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
          * css_get() was called in uncharge().
          */
         if (do_swap_account && swapout && memcg)
-               swap_cgroup_record(ent, css_id(&memcg->css));
+               swap_cgroup_record(ent, mem_cgroup_id(memcg));
  }
  #endif
  
@@ -4396,8 +4409,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
  {
         unsigned short old_id, new_id;
  
-       old_id = css_id(&from->css);
-       new_id = css_id(&to->css);
+       old_id = mem_cgroup_id(from);
+       new_id = mem_cgroup_id(to);
  
         if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
                 mem_cgroup_swap_statistics(from, false);
@@ -6165,7 +6178,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
         size_t size = memcg_size();
  
         mem_cgroup_remove_from_trees(memcg);
-       free_css_id(&mem_cgroup_subsys, &memcg->css);
  
         for_each_node(node)
                 free_mem_cgroup_per_zone_info(memcg, node);
@@ -6268,6 +6280,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
         struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
         int error = 0;
  
+       if (css->cgroup->id > MEM_CGROUP_ID_MAX)
+               return -ENOSPC;
+
         if (!parent)
                 return 0;
  
@@ -6539,7 +6554,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
         }
         /* There is a swap entry and a page doesn't exist or isn't charged */
         if (ent.val && !ret &&
-                       css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) {
+           mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
                 ret = MC_TARGET_SWAP;
                 if (target)
                         target->ent = ent;
@@ -6959,7 +6974,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
         .bind = mem_cgroup_bind,
         .base_cftypes = mem_cgroup_files,
         .early_init = 0,
-       .use_id = 1,
  };
  
  #ifdef CONFIG_MEMCG_SWAP
diff --git a/mm/memory.c b/mm/memory.c

index 15744b2..bf86658 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -679,7 +679,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
         if (vma->vm_ops)
                 printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n",
                        vma->vm_ops->fault);
-       if (vma->vm_file && vma->vm_file->f_op)
+       if (vma->vm_file)
                 printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n",
                        vma->vm_file->f_op->mmap);
         dump_stack();
diff --git a/mm/mmap.c b/mm/mmap.c

index 803048e..5a6badd 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1297,7 +1297,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                                 vm_flags &= ~VM_MAYEXEC;
                         }
  
-                       if (!file->f_op || !file->f_op->mmap)
+                       if (!file->f_op->mmap)
                                 return -ENODEV;
                         if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
                                 return -EINVAL;
@@ -1949,7 +1949,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                 return -ENOMEM;
  
         get_area = current->mm->get_unmapped_area;
-       if (file && file->f_op && file->f_op->get_unmapped_area)
+       if (file && file->f_op->get_unmapped_area)
                 get_area = file->f_op->get_unmapped_area;
         addr = get_area(file, addr, len, pgoff, flags);
         if (IS_ERR_VALUE(addr))
diff --git a/mm/nommu.c b/mm/nommu.c

index d8a957b..fec093a 100644 (file)
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -937,7 +937,7 @@ static int validate_mmap_request(struct file *file,
                 struct address_space *mapping;
  
                 /* files must support mmap */
-               if (!file->f_op || !file->f_op->mmap)
+               if (!file->f_op->mmap)
                         return -ENODEV;
  
                 /* work out if what we've got could possibly be shared
diff --git a/mm/percpu.c b/mm/percpu.c

index 8c8e08f..0d10def 100644 (file)
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1706,8 +1706,9 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
  
  out_free_areas:
         for (group = 0; group < ai->nr_groups; group++)
-               free_fn(areas[group],
-                       ai->groups[group].nr_units * ai->unit_size);
+               if (areas[group])
+                       free_fn(areas[group],
+                               ai->groups[group].nr_units * ai->unit_size);
  out_free:
         pcpu_free_alloc_info(ai);
         if (areas)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c

index 3ffda1b..9321a77 100644 (file)
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -244,10 +244,10 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
         if (!ts)
                 return -EREMOTEIO;
  
-       if (!ts->rd->f_op || !ts->rd->f_op->poll)
+       if (!ts->rd->f_op->poll)
                 return -EIO;
  
-       if (!ts->wr->f_op || !ts->wr->f_op->poll)
+       if (!ts->wr->f_op->poll)
                 return -EIO;
  
         ret = ts->rd->f_op->poll(ts->rd, pt);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c

index f94567b..d0d14a0 100644 (file)
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -519,8 +519,8 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
         d_add(dentry, inode);
         return 0;
  out_err:
-       printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
-                       __FILE__, __func__, dentry->d_name.name);
+       printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n",
+                       __FILE__, __func__, dentry);
         dput(dentry);
         return -ENOMEM;
  }
@@ -755,8 +755,8 @@ static int rpc_populate(struct dentry *parent,
  out_bad:
         __rpc_depopulate(parent, files, start, eof);
         mutex_unlock(&dir->i_mutex);
-       printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
-                       __FILE__, __func__, parent->d_name.name);
+       printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
+                       __FILE__, __func__, parent);
         return err;
  }
  
@@ -852,8 +852,8 @@ out:
         return dentry;
  out_err:
         dentry = ERR_PTR(err);
-       printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n",
-                       __FILE__, __func__, parent->d_name.name, name,
+       printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n",
+                       __FILE__, __func__, parent, name,
                         err);
         goto out;
  }
diff --git a/security/device_cgroup.c b/security/device_cgroup.c

index c123628..7c2a0a7 100644 (file)
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -63,16 +63,6 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
  
  struct cgroup_subsys devices_subsys;
  
-static int devcgroup_can_attach(struct cgroup_subsys_state *new_css,
-                               struct cgroup_taskset *set)
-{
-       struct task_struct *task = cgroup_taskset_first(set);
-
-       if (current != task && !capable(CAP_SYS_ADMIN))
-               return -EPERM;
-       return 0;
-}
-
  /*
   * called under devcgroup_mutex
   */
@@ -697,7 +687,6 @@ static struct cftype dev_cgroup_files[] = {
  
  struct cgroup_subsys devices_subsys = {
         .name = "devices",
-       .can_attach = devcgroup_can_attach,
         .css_alloc = devcgroup_css_alloc,
         .css_free = devcgroup_css_free,
         .css_online = devcgroup_online,
diff --git a/sound/core/sound.c b/sound/core/sound.c

index f002bd9..437c25e 100644 (file)
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -153,7 +153,7 @@ static int snd_open(struct inode *inode, struct file *file)
  {
         unsigned int minor = iminor(inode);
         struct snd_minor *mptr = NULL;
-       const struct file_operations *old_fops;
+       const struct file_operations *new_fops;
         int err = 0;
  
         if (minor >= ARRAY_SIZE(snd_minors))
@@ -167,24 +167,14 @@ static int snd_open(struct inode *inode, struct file *file)
                         return -ENODEV;
                 }
         }
-       old_fops = file->f_op;
-       file->f_op = fops_get(mptr->f_ops);
-       if (file->f_op == NULL) {
-               file->f_op = old_fops;
-               err = -ENODEV;
-       }
+       new_fops = fops_get(mptr->f_ops);
         mutex_unlock(&sound_mutex);
-       if (err < 0)
-               return err;
+       if (!new_fops)
+               return -ENODEV;
+       replace_fops(file, new_fops);
  
-       if (file->f_op->open) {
+       if (file->f_op->open)
                 err = file->f_op->open(inode, file);
-               if (err) {
-                       fops_put(file->f_op);
-                       file->f_op = fops_get(old_fops);
-               }
-       }
-       fops_put(old_fops);
         return err;
  }
  
diff --git a/sound/sound_core.c b/sound/sound_core.c

index 45759f4..11e953a 100644 (file)
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -626,31 +626,20 @@ static int soundcore_open(struct inode *inode, struct file *file)
                 if (s)
                         new_fops = fops_get(s->unit_fops);
         }
+       spin_unlock(&sound_loader_lock);
         if (new_fops) {
                 /*
                  * We rely upon the fact that we can't be unloaded while the
-                * subdriver is there, so if ->open() is successful we can
-                * safely drop the reference counter and if it is not we can
-                * revert to old ->f_op. Ugly, indeed, but that's the cost of
-                * switching ->f_op in the first place.
+                * subdriver is there.
                  */
                 int err = 0;
-               const struct file_operations *old_fops = file->f_op;
-               file->f_op = new_fops;
-               spin_unlock(&sound_loader_lock);
+               replace_fops(file, new_fops);
  
                 if (file->f_op->open)
                         err = file->f_op->open(inode,file);
  
-               if (err) {
-                       fops_put(file->f_op);
-                       file->f_op = fops_get(old_fops);
-               }
-
-               fops_put(old_fops);
                 return err;
         }
-       spin_unlock(&sound_loader_lock);
         return -ENODEV;
  }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 13 Nov 2013 06:45:43 +0000 (15:45 +0900)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 13 Nov 2013 06:45:43 +0000 (15:45 +0900)
Documentation/devices.txt		patch \| blob \| history
Documentation/filesystems/directory-locking		patch \| blob \| history
Documentation/filesystems/f2fs.txt		patch \| blob \| history
Documentation/filesystems/porting		patch \| blob \| history
arch/arm64/kernel/signal32.c		patch \| blob \| history
arch/ia64/kernel/elfcore.c		patch \| blob \| history
arch/ia64/kernel/signal.c		patch \| blob \| history
arch/mips/kernel/signal32.c		patch \| blob \| history
arch/parisc/kernel/signal32.c		patch \| blob \| history
arch/parisc/kernel/signal32.h		patch \| blob \| history
arch/powerpc/include/asm/spu.h		patch \| blob \| history
arch/powerpc/kernel/signal_32.c		patch \| blob \| history
arch/powerpc/platforms/cell/spu_syscalls.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/coredump.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/spufs.h		patch \| blob \| history
arch/s390/kernel/compat_signal.c		patch \| blob \| history
arch/sparc/kernel/signal32.c		patch \| blob \| history
arch/tile/kernel/compat_signal.c		patch \| blob \| history
arch/x86/ia32/ia32_aout.c		patch \| blob \| history
arch/x86/ia32/ia32_signal.c		patch \| blob \| history
arch/x86/include/asm/percpu.h		patch \| blob \| history
arch/x86/um/elfcore.c		patch \| blob \| history
drivers/ata/ahci.c		patch \| blob \| history
drivers/ata/ahci.h		patch \| blob \| history
drivers/ata/ahci_imx.c		patch \| blob \| history
drivers/ata/ahci_platform.c		patch \| blob \| history
drivers/ata/ata_piix.c		patch \| blob \| history
drivers/ata/libahci.c		patch \| blob \| history
drivers/ata/libata-core.c		patch \| blob \| history
drivers/ata/libata-eh.c		patch \| blob \| history
drivers/ata/libata-transport.c		patch \| blob \| history
drivers/ata/sata_highbank.c		patch \| blob \| history
drivers/ata/sata_rcar.c		patch \| blob \| history
drivers/base/devtmpfs.c		patch \| blob \| history
drivers/char/misc.c		patch \| blob \| history
drivers/gpu/drm/drm_fops.c		patch \| blob \| history
drivers/media/dvb-core/dmxdev.c		patch \| blob \| history
drivers/media/dvb-core/dvbdev.c		patch \| blob \| history
drivers/mtd/nand/nandsim.c		patch \| blob \| history
drivers/mtd/ubi/attach.c		patch \| blob \| history
drivers/mtd/ubi/fastmap.c		patch \| blob \| history
drivers/mtd/ubi/wl.c		patch \| blob \| history
drivers/staging/comedi/comedi_compat32.c		patch \| blob \| history
drivers/staging/lustre/lustre/include/linux/lustre_compat25.h		patch \| blob \| history
drivers/staging/lustre/lustre/llite/namei.c		patch \| blob \| history
drivers/staging/lustre/lustre/lvfs/lvfs_linux.c		patch \| blob \| history
drivers/staging/rtl8188eu/include/osdep_service.h		patch \| blob \| history
drivers/staging/rtl8188eu/os_dep/osdep_service.c		patch \| blob \| history
drivers/usb/core/file.c		patch \| blob \| history
fs/9p/cache.h		patch \| blob \| history
fs/9p/vfs_file.c		patch \| blob \| history
fs/9p/vfs_inode.c		patch \| blob \| history
fs/9p/vfs_inode_dotl.c		patch \| blob \| history
fs/adfs/adfs.h		patch \| blob \| history
fs/adfs/super.c		patch \| blob \| history
fs/aio.c		patch \| blob \| history
fs/anon_inodes.c		patch \| blob \| history
fs/attr.c		patch \| blob \| history
fs/autofs4/autofs_i.h		patch \| blob \| history
fs/autofs4/dev-ioctl.c		patch \| blob \| history
fs/autofs4/inode.c		patch \| blob \| history
fs/befs/linuxvfs.c		patch \| blob \| history
fs/binfmt_aout.c		patch \| blob \| history
fs/binfmt_elf.c		patch \| blob \| history
fs/binfmt_elf_fdpic.c		patch \| blob \| history
fs/binfmt_em86.c		patch \| blob \| history
fs/cachefiles/interface.c		patch \| blob \| history
fs/cachefiles/namei.c		patch \| blob \| history
fs/char_dev.c		patch \| blob \| history
fs/cifs/cifs_fs_sb.h		patch \| blob \| history
fs/cifs/cifsfs.c		patch \| blob \| history
fs/cifs/cifsfs.h		patch \| blob \| history
fs/cifs/connect.c		patch \| blob \| history
fs/cifs/link.c		patch \| blob \| history
fs/coda/coda_linux.h		patch \| blob \| history
fs/coda/dir.c		patch \| blob \| history
fs/coda/file.c		patch \| blob \| history
fs/coda/inode.c		patch \| blob \| history
fs/compat_ioctl.c		patch \| blob \| history
fs/coredump.c		patch \| blob \| history
fs/dcache.c		patch \| blob \| history
fs/dlm/lockspace.c		patch \| blob \| history
fs/ecryptfs/dentry.c		patch \| blob \| history
fs/ecryptfs/ecryptfs_kernel.h		patch \| blob \| history
fs/ecryptfs/file.c		patch \| blob \| history
fs/ecryptfs/inode.c		patch \| blob \| history
fs/ecryptfs/main.c		patch \| blob \| history
fs/eventpoll.c		patch \| blob \| history
fs/exec.c		patch \| blob \| history
fs/exportfs/expfs.c		patch \| blob \| history
fs/ext2/inode.c		patch \| blob \| history
fs/ext2/xip.c		patch \| blob \| history
fs/ext3/super.c		patch \| blob \| history
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/ioctl.c		patch \| blob \| history
fs/ext4/move_extent.c		patch \| blob \| history
fs/f2fs/Kconfig		patch \| blob \| history
fs/f2fs/acl.c		patch \| blob \| history
fs/f2fs/acl.h		patch \| blob \| history
fs/f2fs/checkpoint.c		patch \| blob \| history
fs/f2fs/data.c		patch \| blob \| history
fs/f2fs/dir.c		patch \| blob \| history
fs/f2fs/f2fs.h		patch \| blob \| history
fs/f2fs/file.c		patch \| blob \| history
fs/f2fs/gc.c		patch \| blob \| history
fs/f2fs/inode.c		patch \| blob \| history
fs/f2fs/namei.c		patch \| blob \| history
fs/f2fs/node.c		patch \| blob \| history
fs/f2fs/recovery.c		patch \| blob \| history
fs/f2fs/segment.c		patch \| blob \| history
fs/f2fs/segment.h		patch \| blob \| history
fs/f2fs/super.c		patch \| blob \| history
fs/f2fs/xattr.c		patch \| blob \| history
fs/fat/fat.h		patch \| blob \| history
fs/fat/inode.c		patch \| blob \| history
fs/fcntl.c		patch \| blob \| history
fs/file_table.c		patch \| blob \| history
fs/fs-writeback.c		patch \| blob \| history
fs/fuse/cuse.c		patch \| blob \| history
fs/fuse/dir.c		patch \| blob \| history
fs/fuse/file.c		patch \| blob \| history
fs/fuse/fuse_i.h		patch \| blob \| history
fs/fuse/inode.c		patch \| blob \| history
fs/gfs2/inode.c		patch \| blob \| history
fs/hpfs/hpfs_fn.h		patch \| blob \| history
fs/hpfs/namei.c		patch \| blob \| history
fs/hpfs/super.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/internal.h		patch \| blob \| history
fs/ioctl.c		patch \| blob \| history
fs/isofs/inode.c		patch \| blob \| history
fs/jbd/transaction.c		patch \| blob \| history
fs/libfs.c		patch \| blob \| history
fs/locks.c		patch \| blob \| history
fs/mount.h		patch \| blob \| history
fs/namei.c		patch \| blob \| history
fs/namespace.c		patch \| blob \| history
fs/ncpfs/dir.c		patch \| blob \| history
fs/ncpfs/file.c		patch \| blob \| history
fs/ncpfs/inode.c		patch \| blob \| history
fs/ncpfs/ncp_fs_sb.h		patch \| blob \| history
fs/nfs/dir.c		patch \| blob \| history
fs/nfs/direct.c		patch \| blob \| history
fs/nfs/file.c		patch \| blob \| history
fs/nfs/namespace.c		patch \| blob \| history
fs/nfs/nfs3proc.c		patch \| blob \| history
fs/nfs/nfs4file.c		patch \| blob \| history
fs/nfs/nfs4namespace.c		patch \| blob \| history
fs/nfs/nfs4proc.c		patch \| blob \| history
fs/nfs/proc.c		patch \| blob \| history
fs/nfs/unlink.c		patch \| blob \| history
fs/nfs/write.c		patch \| blob \| history
fs/nfsd/nfs4recover.c		patch \| blob \| history
fs/nfsd/nfs4state.c		patch \| blob \| history
fs/nfsd/nfsfh.c		patch \| blob \| history
fs/nfsd/nfsfh.h		patch \| blob \| history
fs/nfsd/vfs.c		patch \| blob \| history
fs/ntfs/inode.c		patch \| blob \| history
fs/ocfs2/inode.c		patch \| blob \| history
fs/open.c		patch \| blob \| history
fs/pnode.c		patch \| blob \| history
fs/proc/self.c		patch \| blob \| history
fs/proc_namespace.c		patch \| blob \| history
fs/qnx4/namei.c		patch \| blob \| history
fs/quota/quota.c		patch \| blob \| history
fs/read_write.c		patch \| blob \| history
fs/readdir.c		patch \| blob \| history
fs/select.c		patch \| blob \| history
fs/splice.c		patch \| blob \| history
fs/stat.c		patch \| blob \| history
fs/super.c		patch \| blob \| history
fs/sync.c		patch \| blob \| history
fs/ubifs/debug.c		patch \| blob \| history
fs/ubifs/dir.c		patch \| blob \| history
fs/ubifs/gc.c		patch \| blob \| history
fs/ubifs/journal.c		patch \| blob \| history
fs/ubifs/super.c		patch \| blob \| history
fs/ubifs/xattr.c		patch \| blob \| history
fs/udf/super.c		patch \| blob \| history
fs/utimes.c		patch \| blob \| history
include/asm-generic/siginfo.h		patch \| blob \| history
include/linux/anon_inodes.h		patch \| blob \| history
include/linux/ata.h		patch \| blob \| history
include/linux/binfmts.h		patch \| blob \| history
include/linux/cgroup.h		patch \| blob \| history
include/linux/compat.h		patch \| blob \| history
include/linux/coredump.h		patch \| blob \| history
include/linux/dcache.h		patch \| blob \| history
include/linux/elf.h		patch \| blob \| history
include/linux/elfcore.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/lglock.h		patch \| blob \| history
include/linux/miscdevice.h		patch \| blob \| history
include/linux/mount.h		patch \| blob \| history
include/linux/namei.h		patch \| blob \| history
include/linux/percpu.h		patch \| blob \| history
include/linux/pid_namespace.h		patch \| blob \| history
include/trace/events/f2fs.h		patch \| blob \| history
ipc/mqueue.c		patch \| blob \| history
kernel/cgroup.c		patch \| blob \| history
kernel/elfcore.c		patch \| blob \| history
kernel/pid_namespace.c		patch \| blob \| history
kernel/signal.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mmap.c		patch \| blob \| history
mm/nommu.c		patch \| blob \| history
mm/percpu.c		patch \| blob \| history
net/9p/trans_fd.c		patch \| blob \| history
net/sunrpc/rpc_pipe.c		patch \| blob \| history
security/device_cgroup.c		patch \| blob \| history
sound/core/sound.c		patch \| blob \| history
sound/sound_core.c		patch \| blob \| history