X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=fs%2Fbuffer.c;h=0e5ec371ce727e8fcccd79d33108b9aca1f051ca;hb=093eda3ce5dc3758c9a5e806ea6573bfffed3ff7;hp=aecd057cd0e0607631201d93be38bda0b9e44001;hpb=ba7cc09c9c9e29a57045dc5bbf843ac1cfad3283;p=cascardo%2Flinux.git diff --git a/fs/buffer.c b/fs/buffer.c index aecd057cd0e0..0e5ec371ce72 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -356,7 +356,7 @@ static void free_more_memory(void) for_each_online_pgdat(pgdat) { zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; if (*zones) - try_to_free_pages(zones, GFP_NOFS); + try_to_free_pages(zones, 0, GFP_NOFS); } } @@ -675,6 +675,39 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) } EXPORT_SYMBOL(mark_buffer_dirty_inode); +/* + * Mark the page dirty, and set it dirty in the radix tree, and mark the inode + * dirty. + * + * If warn is true, then emit a warning if the page is not uptodate and has + * not been truncated. + */ +static int __set_page_dirty(struct page *page, + struct address_space *mapping, int warn) +{ + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + if (TestSetPageDirty(page)) + return 0; + + write_lock_irq(&mapping->tree_lock); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(warn && !PageUptodate(page)); + + if (mapping_cap_account_dirty(mapping)) { + __inc_zone_page_state(page, NR_FILE_DIRTY); + task_io_account_write(PAGE_CACHE_SIZE); + } + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&mapping->tree_lock); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + + return 1; +} + /* * Add a page to the dirty page list. * @@ -702,7 +735,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); */ int __set_page_dirty_buffers(struct page *page) { - struct address_space * const mapping = page_mapping(page); + struct address_space *mapping = page_mapping(page); if (unlikely(!mapping)) return !TestSetPageDirty(page); @@ -719,21 +752,7 @@ int __set_page_dirty_buffers(struct page *page) } spin_unlock(&mapping->private_lock); - if (TestSetPageDirty(page)) - return 0; - - write_lock_irq(&mapping->tree_lock); - if (page->mapping) { /* Race with truncate? */ - if (mapping_cap_account_dirty(mapping)) { - __inc_zone_page_state(page, NR_FILE_DIRTY); - task_io_account_write(PAGE_CACHE_SIZE); - } - radix_tree_tag_set(&mapping->page_tree, - page_index(page), PAGECACHE_TAG_DIRTY); - } - write_unlock_irq(&mapping->tree_lock); - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return 1; + return __set_page_dirty(page, mapping, 1); } EXPORT_SYMBOL(__set_page_dirty_buffers); @@ -981,7 +1000,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct page *page; struct buffer_head *bh; - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, index, + (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) return NULL; @@ -1025,11 +1045,6 @@ failed: /* * Create buffers for the specified block device block's page. If * that page was dirty, the buffers are set dirty also. - * - * Except that's a bug. Attaching dirty buffers to a dirty - * blockdev's page can result in filesystem corruption, because - * some of those buffers may be aliases of filesystem data. - * grow_dev_page() will go BUG() if this happens. */ static int grow_buffers(struct block_device *bdev, sector_t block, int size) @@ -1136,8 +1151,9 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) */ void fastcall mark_buffer_dirty(struct buffer_head *bh) { + WARN_ON_ONCE(!buffer_uptodate(bh)); if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) - __set_page_dirty_nobuffers(bh->b_page); + __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); } /* @@ -2100,7 +2116,7 @@ int cont_prepare_write(struct page *page, unsigned offset, PAGE_CACHE_SIZE, get_block); if (status) goto out_unmap; - zero_user_page(page, zerofrom, PAGE_CACHE_SIZE - zerofrom, + zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom, KM_USER0); generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); unlock_page(new_page); @@ -2178,6 +2194,52 @@ int generic_commit_write(struct file *file, struct page *page, return 0; } +/* + * block_page_mkwrite() is not allowed to change the file size as it gets + * called from a page fault handler when a page is first dirtied. Hence we must + * be careful to check for EOF conditions here. We set the page up correctly + * for a written page which means we get ENOSPC checking when writing into + * holes and correct delalloc and unwritten extent mapping on filesystems that + * support these features. + * + * We are not allowed to take the i_mutex here so we have to play games to + * protect against truncate races as the page could now be beyond EOF. Because + * vmtruncate() writes the inode size before removing pages, once we have the + * page lock we can determine safely if the page is beyond EOF. If it is not + * beyond EOF, then the page is guaranteed safe against truncation until we + * unlock the page. + */ +int +block_page_mkwrite(struct vm_area_struct *vma, struct page *page, + get_block_t get_block) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + unsigned long end; + loff_t size; + int ret = -EINVAL; + + lock_page(page); + size = i_size_read(inode); + if ((page->mapping != inode->i_mapping) || + (page_offset(page) > size)) { + /* page got truncated out from underneath us */ + goto out_unlock; + } + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) + end = size & ~PAGE_CACHE_MASK; + else + end = PAGE_CACHE_SIZE; + + ret = block_prepare_write(page, 0, end, get_block); + if (!ret) + ret = block_commit_write(page, 0, end); + +out_unlock: + unlock_page(page); + return ret; +} /* * nobh_prepare_write()'s prereads are special: the buffer_heads are freed @@ -2898,8 +2960,9 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); + struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { + INIT_LIST_HEAD(&ret->b_assoc_buffers); get_cpu_var(bh_accounting).nr++; recalc_bh_state(); put_cpu_var(bh_accounting); @@ -2918,17 +2981,6 @@ void free_buffer_head(struct buffer_head *bh) } EXPORT_SYMBOL(free_buffer_head); -static void -init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) -{ - if (flags & SLAB_CTOR_CONSTRUCTOR) { - struct buffer_head * bh = (struct buffer_head *)data; - - memset(bh, 0, sizeof(*bh)); - INIT_LIST_HEAD(&bh->b_assoc_buffers); - } -} - static void buffer_exit_cpu(int cpu) { int i; @@ -2955,12 +3007,8 @@ void __init buffer_init(void) { int nrpages; - bh_cachep = kmem_cache_create("buffer_head", - sizeof(struct buffer_head), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| - SLAB_MEM_SPREAD), - init_buffer_head, - NULL); + bh_cachep = KMEM_CACHE(buffer_head, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); /* * Limit the bh occupancy to 10% of ZONE_NORMAL @@ -2975,6 +3023,7 @@ EXPORT_SYMBOL(__brelse); EXPORT_SYMBOL(__wait_on_buffer); EXPORT_SYMBOL(block_commit_write); EXPORT_SYMBOL(block_prepare_write); +EXPORT_SYMBOL(block_page_mkwrite); EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page);