ext4: fix deadlock in journal_unmap_buffer()

author Jan Kara <jack@suse.cz>

Tue, 25 Dec 2012 18:29:52 +0000 (13:29 -0500)

committer Theodore Ts'o <tytso@mit.edu>

Tue, 25 Dec 2012 18:29:52 +0000 (13:29 -0500)
author Jan Kara <jack@suse.cz>
Tue, 25 Dec 2012 18:29:52 +0000 (13:29 -0500)
committer Theodore Ts'o <tytso@mit.edu>
Tue, 25 Dec 2012 18:29:52 +0000 (13:29 -0500)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 12d3fbc..cbfe13b 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
         block_invalidatepage(page, offset);
  }
  
-static void ext4_journalled_invalidatepage(struct page *page,
-                                          unsigned long offset)
+static int __ext4_journalled_invalidatepage(struct page *page,
+                                           unsigned long offset)
  {
         journal_t *journal = EXT4_JOURNAL(page->mapping->host);
  
@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
         if (offset == 0)
                 ClearPageChecked(page);
  
-       jbd2_journal_invalidatepage(journal, page, offset);
+       return jbd2_journal_invalidatepage(journal, page, offset);
+}
+
+/* Wrapper for aops... */
+static void ext4_journalled_invalidatepage(struct page *page,
+                                          unsigned long offset)
+{
+       WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
  }
  
  static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
         return err;
  }
  
+/*
+ * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
+ * buffers that are attached to a page stradding i_size and are undergoing
+ * commit. In that case we have to wait for commit to finish and try again.
+ */
+static void ext4_wait_for_tail_page_commit(struct inode *inode)
+{
+       struct page *page;
+       unsigned offset;
+       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+       tid_t commit_tid = 0;
+       int ret;
+
+       offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+       /*
+        * All buffers in the last page remain valid? Then there's nothing to
+        * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+        * blocksize case
+        */
+       if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+               return;
+       while (1) {
+               page = find_lock_page(inode->i_mapping,
+                                     inode->i_size >> PAGE_CACHE_SHIFT);
+               if (!page)
+                       return;
+               ret = __ext4_journalled_invalidatepage(page, offset);
+               unlock_page(page);
+               page_cache_release(page);
+               if (ret != -EBUSY)
+                       return;
+               commit_tid = 0;
+               read_lock(&journal->j_state_lock);
+               if (journal->j_committing_transaction)
+                       commit_tid = journal->j_committing_transaction->t_tid;
+               read_unlock(&journal->j_state_lock);
+               if (commit_tid)
+                       jbd2_log_wait_commit(journal, commit_tid);
+       }
+}
+
  /*
   * ext4_setattr()
   *
@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
         }
  
         if (attr->ia_valid & ATTR_SIZE) {
-               if (attr->ia_size != i_size_read(inode)) {
-                       truncate_setsize(inode, attr->ia_size);
-                       /* Inode size will be reduced, wait for dio in flight.
-                        * Temporarily disable dioread_nolock to prevent
-                        * livelock. */
+               if (attr->ia_size != inode->i_size) {
+                       loff_t oldsize = inode->i_size;
+
+                       i_size_write(inode, attr->ia_size);
+                       /*
+                        * Blocks are going to be removed from the inode. Wait
+                        * for dio in flight.  Temporarily disable
+                        * dioread_nolock to prevent livelock.
+                        */
                         if (orphan) {
-                               ext4_inode_block_unlocked_dio(inode);
-                               inode_dio_wait(inode);
-                               ext4_inode_resume_unlocked_dio(inode);
+                               if (!ext4_should_journal_data(inode)) {
+                                       ext4_inode_block_unlocked_dio(inode);
+                                       inode_dio_wait(inode);
+                                       ext4_inode_resume_unlocked_dio(inode);
+                               } else
+                                       ext4_wait_for_tail_page_commit(inode);
                         }
+                       /*
+                        * Truncate pagecache after we've waited for commit
+                        * in data=journal mode to make pages freeable.
+                        */
+                       truncate_pagecache(inode, oldsize, inode->i_size);
                 }
                 ext4_truncate(inode);
         }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index cd4485d..ddc51a7 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
  
         BUFFER_TRACE(bh, "entry");
  
-retry:
         /*
          * It is safe to proceed here without the j_list_lock because the
          * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1935,14 +1934,11 @@ retry:
                  * for commit and try again.
                  */
                 if (partial_page) {
-                       tid_t tid = journal->j_committing_transaction->t_tid;
-
                         jbd2_journal_put_journal_head(jh);
                         spin_unlock(&journal->j_list_lock);
                         jbd_unlock_bh_state(bh);
                         write_unlock(&journal->j_state_lock);
-                       jbd2_log_wait_commit(journal, tid);
-                       goto retry;
+                       return -EBUSY;
                 }
                 /*
                  * OK, buffer won't be reachable after truncate. We just set
@@ -2003,21 +1999,23 @@ zap_buffer_unlocked:
   * @page:    page to flush
   * @offset:  length of page to invalidate.
   *
- * Reap page buffers containing data after offset in page.
- *
+ * Reap page buffers containing data after offset in page. Can return -EBUSY
+ * if buffers are part of the committing transaction and the page is straddling
+ * i_size. Caller then has to wait for current commit and try again.
   */
-void jbd2_journal_invalidatepage(journal_t *journal,
-                     struct page *page,
-                     unsigned long offset)
+int jbd2_journal_invalidatepage(journal_t *journal,
+                               struct page *page,
+                               unsigned long offset)
  {
         struct buffer_head *head, *bh, *next;
         unsigned int curr_off = 0;
         int may_free = 1;
+       int ret = 0;
  
         if (!PageLocked(page))
                 BUG();
         if (!page_has_buffers(page))
-               return;
+               return 0;
  
         /* We will potentially be playing with lists other than just the
          * data lists (especially for journaled data mode), so be
@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
                 if (offset <= curr_off) {
                         /* This block is wholly outside the truncation point */
                         lock_buffer(bh);
-                       may_free &= journal_unmap_buffer(journal, bh,
-                                                        offset > 0);
+                       ret = journal_unmap_buffer(journal, bh, offset > 0);
                         unlock_buffer(bh);
+                       if (ret < 0)
+                               return ret;
+                       may_free &= ret;
                 }
                 curr_off = next_off;
                 bh = next;
@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
                 if (may_free && try_to_free_buffers(page))
                         J_ASSERT(!page_has_buffers(page));
         }
+       return 0;
  }
  
  /*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index 1be23d9..e30b663 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1098,7 +1098,7 @@ void               jbd2_journal_set_triggers(struct buffer_head *,
  extern int      jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
  extern int      jbd2_journal_forget (handle_t *, struct buffer_head *);
  extern void     journal_sync_buffer (struct buffer_head *);
-extern void     jbd2_journal_invalidatepage(journal_t *,
+extern int      jbd2_journal_invalidatepage(journal_t *,
                                 struct page *, unsigned long);
  extern int      jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
  extern int      jbd2_journal_stop(handle_t *);
author	Jan Kara <jack@suse.cz>
	Tue, 25 Dec 2012 18:29:52 +0000 (13:29 -0500)
committer	Theodore Ts'o <tytso@mit.edu>
	Tue, 25 Dec 2012 18:29:52 +0000 (13:29 -0500)
fs/ext4/inode.c		patch \| blob \| history
fs/jbd2/transaction.c		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history