Merge branch 'akpm' (patches from Andrew)
[cascardo/linux.git] / fs / ext4 / xattr.c
1 /*
2  * linux/fs/ext4/xattr.c
3  *
4  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5  *
6  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7  * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8  * Extended attributes for symlinks and special files added per
9  *  suggestion of Luka Renko <luka.renko@hermes.si>.
10  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
11  *  Red Hat Inc.
12  * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
13  *  and Andreas Gruenbacher <agruen@suse.de>.
14  */
15
16 /*
17  * Extended attributes are stored directly in inodes (on file systems with
18  * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
19  * field contains the block number if an inode uses an additional block. All
20  * attributes must fit in the inode and one additional block. Blocks that
21  * contain the identical set of attributes may be shared among several inodes.
22  * Identical blocks are detected by keeping a cache of blocks that have
23  * recently been accessed.
24  *
25  * The attributes in inodes and on blocks have a different header; the entries
26  * are stored in the same format:
27  *
28  *   +------------------+
29  *   | header           |
30  *   | entry 1          | |
31  *   | entry 2          | | growing downwards
32  *   | entry 3          | v
33  *   | four null bytes  |
34  *   | . . .            |
35  *   | value 1          | ^
36  *   | value 3          | | growing upwards
37  *   | value 2          | |
38  *   +------------------+
39  *
40  * The header is followed by multiple entry descriptors. In disk blocks, the
41  * entry descriptors are kept sorted. In inodes, they are unsorted. The
42  * attribute values are aligned to the end of the block in no specific order.
43  *
44  * Locking strategy
45  * ----------------
46  * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
47  * EA blocks are only changed if they are exclusive to an inode, so
48  * holding xattr_sem also means that nothing but the EA block's reference
49  * count can change. Multiple writers to the same block are synchronized
50  * by the buffer lock.
51  */
52
53 #include <linux/init.h>
54 #include <linux/fs.h>
55 #include <linux/slab.h>
56 #include <linux/mbcache.h>
57 #include <linux/quotaops.h>
58 #include "ext4_jbd2.h"
59 #include "ext4.h"
60 #include "xattr.h"
61 #include "acl.h"
62
63 #ifdef EXT4_XATTR_DEBUG
64 # define ea_idebug(inode, f...) do { \
65                 printk(KERN_DEBUG "inode %s:%lu: ", \
66                         inode->i_sb->s_id, inode->i_ino); \
67                 printk(f); \
68                 printk("\n"); \
69         } while (0)
70 # define ea_bdebug(bh, f...) do { \
71                 printk(KERN_DEBUG "block %pg:%lu: ",               \
72                        bh->b_bdev, (unsigned long) bh->b_blocknr); \
73                 printk(f); \
74                 printk("\n"); \
75         } while (0)
76 #else
77 # define ea_idebug(inode, fmt, ...)     no_printk(fmt, ##__VA_ARGS__)
78 # define ea_bdebug(bh, fmt, ...)        no_printk(fmt, ##__VA_ARGS__)
79 #endif
80
81 static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
82 static struct buffer_head *ext4_xattr_cache_find(struct inode *,
83                                                  struct ext4_xattr_header *,
84                                                  struct mb_cache_entry **);
85 static void ext4_xattr_rehash(struct ext4_xattr_header *,
86                               struct ext4_xattr_entry *);
87 static int ext4_xattr_list(struct dentry *dentry, char *buffer,
88                            size_t buffer_size);
89
90 static const struct xattr_handler *ext4_xattr_handler_map[] = {
91         [EXT4_XATTR_INDEX_USER]              = &ext4_xattr_user_handler,
92 #ifdef CONFIG_EXT4_FS_POSIX_ACL
93         [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
94         [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
95 #endif
96         [EXT4_XATTR_INDEX_TRUSTED]           = &ext4_xattr_trusted_handler,
97 #ifdef CONFIG_EXT4_FS_SECURITY
98         [EXT4_XATTR_INDEX_SECURITY]          = &ext4_xattr_security_handler,
99 #endif
100 };
101
102 const struct xattr_handler *ext4_xattr_handlers[] = {
103         &ext4_xattr_user_handler,
104         &ext4_xattr_trusted_handler,
105 #ifdef CONFIG_EXT4_FS_POSIX_ACL
106         &posix_acl_access_xattr_handler,
107         &posix_acl_default_xattr_handler,
108 #endif
109 #ifdef CONFIG_EXT4_FS_SECURITY
110         &ext4_xattr_security_handler,
111 #endif
112         NULL
113 };
114
115 #define EXT4_GET_MB_CACHE(inode)        (((struct ext4_sb_info *) \
116                                 inode->i_sb->s_fs_info)->s_mb_cache)
117
118 static __le32 ext4_xattr_block_csum(struct inode *inode,
119                                     sector_t block_nr,
120                                     struct ext4_xattr_header *hdr)
121 {
122         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
123         __u32 csum;
124         __le64 dsk_block_nr = cpu_to_le64(block_nr);
125         __u32 dummy_csum = 0;
126         int offset = offsetof(struct ext4_xattr_header, h_checksum);
127
128         csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
129                            sizeof(dsk_block_nr));
130         csum = ext4_chksum(sbi, csum, (__u8 *)hdr, offset);
131         csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
132         offset += sizeof(dummy_csum);
133         csum = ext4_chksum(sbi, csum, (__u8 *)hdr + offset,
134                            EXT4_BLOCK_SIZE(inode->i_sb) - offset);
135
136         return cpu_to_le32(csum);
137 }
138
139 static int ext4_xattr_block_csum_verify(struct inode *inode,
140                                         sector_t block_nr,
141                                         struct ext4_xattr_header *hdr)
142 {
143         if (ext4_has_metadata_csum(inode->i_sb) &&
144             (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
145                 return 0;
146         return 1;
147 }
148
149 static void ext4_xattr_block_csum_set(struct inode *inode,
150                                       sector_t block_nr,
151                                       struct ext4_xattr_header *hdr)
152 {
153         if (!ext4_has_metadata_csum(inode->i_sb))
154                 return;
155
156         hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
157 }
158
159 static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
160                                                 struct inode *inode,
161                                                 struct buffer_head *bh)
162 {
163         ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
164         return ext4_handle_dirty_metadata(handle, inode, bh);
165 }
166
167 static inline const struct xattr_handler *
168 ext4_xattr_handler(int name_index)
169 {
170         const struct xattr_handler *handler = NULL;
171
172         if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
173                 handler = ext4_xattr_handler_map[name_index];
174         return handler;
175 }
176
177 /*
178  * Inode operation listxattr()
179  *
180  * d_inode(dentry)->i_mutex: don't care
181  */
182 ssize_t
183 ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
184 {
185         return ext4_xattr_list(dentry, buffer, size);
186 }
187
188 static int
189 ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
190                        void *value_start)
191 {
192         struct ext4_xattr_entry *e = entry;
193
194         while (!IS_LAST_ENTRY(e)) {
195                 struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
196                 if ((void *)next >= end)
197                         return -EFSCORRUPTED;
198                 e = next;
199         }
200
201         while (!IS_LAST_ENTRY(entry)) {
202                 if (entry->e_value_block != 0)
203                         return -EFSCORRUPTED;
204                 if (entry->e_value_size != 0 &&
205                     (value_start + le16_to_cpu(entry->e_value_offs) <
206                      (void *)e + sizeof(__u32) ||
207                      value_start + le16_to_cpu(entry->e_value_offs) +
208                     le32_to_cpu(entry->e_value_size) > end))
209                         return -EFSCORRUPTED;
210                 entry = EXT4_XATTR_NEXT(entry);
211         }
212
213         return 0;
214 }
215
216 static inline int
217 ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
218 {
219         int error;
220
221         if (buffer_verified(bh))
222                 return 0;
223
224         if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
225             BHDR(bh)->h_blocks != cpu_to_le32(1))
226                 return -EFSCORRUPTED;
227         if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
228                 return -EFSBADCRC;
229         error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
230                                        bh->b_data);
231         if (!error)
232                 set_buffer_verified(bh);
233         return error;
234 }
235
236 static int
237 __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
238                          void *end, const char *function, unsigned int line)
239 {
240         struct ext4_xattr_entry *entry = IFIRST(header);
241         int error = -EFSCORRUPTED;
242
243         if (((void *) header >= end) ||
244             (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC)))
245                 goto errout;
246         error = ext4_xattr_check_names(entry, end, entry);
247 errout:
248         if (error)
249                 __ext4_error_inode(inode, function, line, 0,
250                                    "corrupted in-inode xattr");
251         return error;
252 }
253
254 #define xattr_check_inode(inode, header, end) \
255         __xattr_check_inode((inode), (header), (end), __func__, __LINE__)
256
257 static inline int
258 ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
259 {
260         size_t value_size = le32_to_cpu(entry->e_value_size);
261
262         if (entry->e_value_block != 0 || value_size > size ||
263             le16_to_cpu(entry->e_value_offs) + value_size > size)
264                 return -EFSCORRUPTED;
265         return 0;
266 }
267
268 static int
269 ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
270                       const char *name, size_t size, int sorted)
271 {
272         struct ext4_xattr_entry *entry;
273         size_t name_len;
274         int cmp = 1;
275
276         if (name == NULL)
277                 return -EINVAL;
278         name_len = strlen(name);
279         entry = *pentry;
280         for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
281                 cmp = name_index - entry->e_name_index;
282                 if (!cmp)
283                         cmp = name_len - entry->e_name_len;
284                 if (!cmp)
285                         cmp = memcmp(name, entry->e_name, name_len);
286                 if (cmp <= 0 && (sorted || cmp == 0))
287                         break;
288         }
289         *pentry = entry;
290         if (!cmp && ext4_xattr_check_entry(entry, size))
291                 return -EFSCORRUPTED;
292         return cmp ? -ENODATA : 0;
293 }
294
295 static int
296 ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
297                      void *buffer, size_t buffer_size)
298 {
299         struct buffer_head *bh = NULL;
300         struct ext4_xattr_entry *entry;
301         size_t size;
302         int error;
303         struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
304
305         ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
306                   name_index, name, buffer, (long)buffer_size);
307
308         error = -ENODATA;
309         if (!EXT4_I(inode)->i_file_acl)
310                 goto cleanup;
311         ea_idebug(inode, "reading block %llu",
312                   (unsigned long long)EXT4_I(inode)->i_file_acl);
313         bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
314         if (!bh)
315                 goto cleanup;
316         ea_bdebug(bh, "b_count=%d, refcount=%d",
317                 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
318         if (ext4_xattr_check_block(inode, bh)) {
319 bad_block:
320                 EXT4_ERROR_INODE(inode, "bad block %llu",
321                                  EXT4_I(inode)->i_file_acl);
322                 error = -EFSCORRUPTED;
323                 goto cleanup;
324         }
325         ext4_xattr_cache_insert(ext4_mb_cache, bh);
326         entry = BFIRST(bh);
327         error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
328         if (error == -EFSCORRUPTED)
329                 goto bad_block;
330         if (error)
331                 goto cleanup;
332         size = le32_to_cpu(entry->e_value_size);
333         if (buffer) {
334                 error = -ERANGE;
335                 if (size > buffer_size)
336                         goto cleanup;
337                 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
338                        size);
339         }
340         error = size;
341
342 cleanup:
343         brelse(bh);
344         return error;
345 }
346
347 int
348 ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
349                      void *buffer, size_t buffer_size)
350 {
351         struct ext4_xattr_ibody_header *header;
352         struct ext4_xattr_entry *entry;
353         struct ext4_inode *raw_inode;
354         struct ext4_iloc iloc;
355         size_t size;
356         void *end;
357         int error;
358
359         if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
360                 return -ENODATA;
361         error = ext4_get_inode_loc(inode, &iloc);
362         if (error)
363                 return error;
364         raw_inode = ext4_raw_inode(&iloc);
365         header = IHDR(inode, raw_inode);
366         entry = IFIRST(header);
367         end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
368         error = xattr_check_inode(inode, header, end);
369         if (error)
370                 goto cleanup;
371         error = ext4_xattr_find_entry(&entry, name_index, name,
372                                       end - (void *)entry, 0);
373         if (error)
374                 goto cleanup;
375         size = le32_to_cpu(entry->e_value_size);
376         if (buffer) {
377                 error = -ERANGE;
378                 if (size > buffer_size)
379                         goto cleanup;
380                 memcpy(buffer, (void *)IFIRST(header) +
381                        le16_to_cpu(entry->e_value_offs), size);
382         }
383         error = size;
384
385 cleanup:
386         brelse(iloc.bh);
387         return error;
388 }
389
390 /*
391  * ext4_xattr_get()
392  *
393  * Copy an extended attribute into the buffer
394  * provided, or compute the buffer size required.
395  * Buffer is NULL to compute the size of the buffer required.
396  *
397  * Returns a negative error number on failure, or the number of bytes
398  * used / required on success.
399  */
400 int
401 ext4_xattr_get(struct inode *inode, int name_index, const char *name,
402                void *buffer, size_t buffer_size)
403 {
404         int error;
405
406         if (strlen(name) > 255)
407                 return -ERANGE;
408
409         down_read(&EXT4_I(inode)->xattr_sem);
410         error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
411                                      buffer_size);
412         if (error == -ENODATA)
413                 error = ext4_xattr_block_get(inode, name_index, name, buffer,
414                                              buffer_size);
415         up_read(&EXT4_I(inode)->xattr_sem);
416         return error;
417 }
418
419 static int
420 ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
421                         char *buffer, size_t buffer_size)
422 {
423         size_t rest = buffer_size;
424
425         for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
426                 const struct xattr_handler *handler =
427                         ext4_xattr_handler(entry->e_name_index);
428
429                 if (handler && (!handler->list || handler->list(dentry))) {
430                         const char *prefix = handler->prefix ?: handler->name;
431                         size_t prefix_len = strlen(prefix);
432                         size_t size = prefix_len + entry->e_name_len + 1;
433
434                         if (buffer) {
435                                 if (size > rest)
436                                         return -ERANGE;
437                                 memcpy(buffer, prefix, prefix_len);
438                                 buffer += prefix_len;
439                                 memcpy(buffer, entry->e_name, entry->e_name_len);
440                                 buffer += entry->e_name_len;
441                                 *buffer++ = 0;
442                         }
443                         rest -= size;
444                 }
445         }
446         return buffer_size - rest;  /* total size */
447 }
448
449 static int
450 ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
451 {
452         struct inode *inode = d_inode(dentry);
453         struct buffer_head *bh = NULL;
454         int error;
455         struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
456
457         ea_idebug(inode, "buffer=%p, buffer_size=%ld",
458                   buffer, (long)buffer_size);
459
460         error = 0;
461         if (!EXT4_I(inode)->i_file_acl)
462                 goto cleanup;
463         ea_idebug(inode, "reading block %llu",
464                   (unsigned long long)EXT4_I(inode)->i_file_acl);
465         bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
466         error = -EIO;
467         if (!bh)
468                 goto cleanup;
469         ea_bdebug(bh, "b_count=%d, refcount=%d",
470                 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
471         if (ext4_xattr_check_block(inode, bh)) {
472                 EXT4_ERROR_INODE(inode, "bad block %llu",
473                                  EXT4_I(inode)->i_file_acl);
474                 error = -EFSCORRUPTED;
475                 goto cleanup;
476         }
477         ext4_xattr_cache_insert(ext4_mb_cache, bh);
478         error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
479
480 cleanup:
481         brelse(bh);
482
483         return error;
484 }
485
486 static int
487 ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
488 {
489         struct inode *inode = d_inode(dentry);
490         struct ext4_xattr_ibody_header *header;
491         struct ext4_inode *raw_inode;
492         struct ext4_iloc iloc;
493         void *end;
494         int error;
495
496         if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
497                 return 0;
498         error = ext4_get_inode_loc(inode, &iloc);
499         if (error)
500                 return error;
501         raw_inode = ext4_raw_inode(&iloc);
502         header = IHDR(inode, raw_inode);
503         end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
504         error = xattr_check_inode(inode, header, end);
505         if (error)
506                 goto cleanup;
507         error = ext4_xattr_list_entries(dentry, IFIRST(header),
508                                         buffer, buffer_size);
509
510 cleanup:
511         brelse(iloc.bh);
512         return error;
513 }
514
515 /*
516  * ext4_xattr_list()
517  *
518  * Copy a list of attribute names into the buffer
519  * provided, or compute the buffer size required.
520  * Buffer is NULL to compute the size of the buffer required.
521  *
522  * Returns a negative error number on failure, or the number of bytes
523  * used / required on success.
524  */
525 static int
526 ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
527 {
528         int ret, ret2;
529
530         down_read(&EXT4_I(d_inode(dentry))->xattr_sem);
531         ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
532         if (ret < 0)
533                 goto errout;
534         if (buffer) {
535                 buffer += ret;
536                 buffer_size -= ret;
537         }
538         ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
539         if (ret < 0)
540                 goto errout;
541         ret += ret2;
542 errout:
543         up_read(&EXT4_I(d_inode(dentry))->xattr_sem);
544         return ret;
545 }
546
547 /*
548  * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
549  * not set, set it.
550  */
551 static void ext4_xattr_update_super_block(handle_t *handle,
552                                           struct super_block *sb)
553 {
554         if (ext4_has_feature_xattr(sb))
555                 return;
556
557         BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
558         if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
559                 ext4_set_feature_xattr(sb);
560                 ext4_handle_dirty_super(handle, sb);
561         }
562 }
563
564 /*
565  * Release the xattr block BH: If the reference count is > 1, decrement it;
566  * otherwise free the block.
567  */
568 static void
569 ext4_xattr_release_block(handle_t *handle, struct inode *inode,
570                          struct buffer_head *bh)
571 {
572         struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
573         u32 hash, ref;
574         int error = 0;
575
576         BUFFER_TRACE(bh, "get_write_access");
577         error = ext4_journal_get_write_access(handle, bh);
578         if (error)
579                 goto out;
580
581         lock_buffer(bh);
582         hash = le32_to_cpu(BHDR(bh)->h_hash);
583         ref = le32_to_cpu(BHDR(bh)->h_refcount);
584         if (ref == 1) {
585                 ea_bdebug(bh, "refcount now=0; freeing");
586                 /*
587                  * This must happen under buffer lock for
588                  * ext4_xattr_block_set() to reliably detect freed block
589                  */
590                 mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
591                 get_bh(bh);
592                 unlock_buffer(bh);
593                 ext4_free_blocks(handle, inode, bh, 0, 1,
594                                  EXT4_FREE_BLOCKS_METADATA |
595                                  EXT4_FREE_BLOCKS_FORGET);
596         } else {
597                 ref--;
598                 BHDR(bh)->h_refcount = cpu_to_le32(ref);
599                 if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
600                         struct mb_cache_entry *ce;
601
602                         ce = mb_cache_entry_get(ext4_mb_cache, hash,
603                                                 bh->b_blocknr);
604                         if (ce) {
605                                 ce->e_reusable = 1;
606                                 mb_cache_entry_put(ext4_mb_cache, ce);
607                         }
608                 }
609
610                 /*
611                  * Beware of this ugliness: Releasing of xattr block references
612                  * from different inodes can race and so we have to protect
613                  * from a race where someone else frees the block (and releases
614                  * its journal_head) before we are done dirtying the buffer. In
615                  * nojournal mode this race is harmless and we actually cannot
616                  * call ext4_handle_dirty_xattr_block() with locked buffer as
617                  * that function can call sync_dirty_buffer() so for that case
618                  * we handle the dirtying after unlocking the buffer.
619                  */
620                 if (ext4_handle_valid(handle))
621                         error = ext4_handle_dirty_xattr_block(handle, inode,
622                                                               bh);
623                 unlock_buffer(bh);
624                 if (!ext4_handle_valid(handle))
625                         error = ext4_handle_dirty_xattr_block(handle, inode,
626                                                               bh);
627                 if (IS_SYNC(inode))
628                         ext4_handle_sync(handle);
629                 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
630                 ea_bdebug(bh, "refcount now=%d; releasing",
631                           le32_to_cpu(BHDR(bh)->h_refcount));
632         }
633 out:
634         ext4_std_error(inode->i_sb, error);
635         return;
636 }
637
638 /*
639  * Find the available free space for EAs. This also returns the total number of
640  * bytes used by EA entries.
641  */
642 static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
643                                     size_t *min_offs, void *base, int *total)
644 {
645         for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
646                 if (last->e_value_size) {
647                         size_t offs = le16_to_cpu(last->e_value_offs);
648                         if (offs < *min_offs)
649                                 *min_offs = offs;
650                 }
651                 if (total)
652                         *total += EXT4_XATTR_LEN(last->e_name_len);
653         }
654         return (*min_offs - ((void *)last - base) - sizeof(__u32));
655 }
656
657 static int
658 ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
659 {
660         struct ext4_xattr_entry *last;
661         size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
662
663         /* Compute min_offs and last. */
664         last = s->first;
665         for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
666                 if (last->e_value_size) {
667                         size_t offs = le16_to_cpu(last->e_value_offs);
668                         if (offs < min_offs)
669                                 min_offs = offs;
670                 }
671         }
672         free = min_offs - ((void *)last - s->base) - sizeof(__u32);
673         if (!s->not_found) {
674                 if (s->here->e_value_size) {
675                         size_t size = le32_to_cpu(s->here->e_value_size);
676                         free += EXT4_XATTR_SIZE(size);
677                 }
678                 free += EXT4_XATTR_LEN(name_len);
679         }
680         if (i->value) {
681                 if (free < EXT4_XATTR_LEN(name_len) +
682                            EXT4_XATTR_SIZE(i->value_len))
683                         return -ENOSPC;
684         }
685
686         if (i->value && s->not_found) {
687                 /* Insert the new name. */
688                 size_t size = EXT4_XATTR_LEN(name_len);
689                 size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
690                 memmove((void *)s->here + size, s->here, rest);
691                 memset(s->here, 0, size);
692                 s->here->e_name_index = i->name_index;
693                 s->here->e_name_len = name_len;
694                 memcpy(s->here->e_name, i->name, name_len);
695         } else {
696                 if (s->here->e_value_size) {
697                         void *first_val = s->base + min_offs;
698                         size_t offs = le16_to_cpu(s->here->e_value_offs);
699                         void *val = s->base + offs;
700                         size_t size = EXT4_XATTR_SIZE(
701                                 le32_to_cpu(s->here->e_value_size));
702
703                         if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
704                                 /* The old and the new value have the same
705                                    size. Just replace. */
706                                 s->here->e_value_size =
707                                         cpu_to_le32(i->value_len);
708                                 if (i->value == EXT4_ZERO_XATTR_VALUE) {
709                                         memset(val, 0, size);
710                                 } else {
711                                         /* Clear pad bytes first. */
712                                         memset(val + size - EXT4_XATTR_PAD, 0,
713                                                EXT4_XATTR_PAD);
714                                         memcpy(val, i->value, i->value_len);
715                                 }
716                                 return 0;
717                         }
718
719                         /* Remove the old value. */
720                         memmove(first_val + size, first_val, val - first_val);
721                         memset(first_val, 0, size);
722                         s->here->e_value_size = 0;
723                         s->here->e_value_offs = 0;
724                         min_offs += size;
725
726                         /* Adjust all value offsets. */
727                         last = s->first;
728                         while (!IS_LAST_ENTRY(last)) {
729                                 size_t o = le16_to_cpu(last->e_value_offs);
730                                 if (last->e_value_size && o < offs)
731                                         last->e_value_offs =
732                                                 cpu_to_le16(o + size);
733                                 last = EXT4_XATTR_NEXT(last);
734                         }
735                 }
736                 if (!i->value) {
737                         /* Remove the old name. */
738                         size_t size = EXT4_XATTR_LEN(name_len);
739                         last = ENTRY((void *)last - size);
740                         memmove(s->here, (void *)s->here + size,
741                                 (void *)last - (void *)s->here + sizeof(__u32));
742                         memset(last, 0, size);
743                 }
744         }
745
746         if (i->value) {
747                 /* Insert the new value. */
748                 s->here->e_value_size = cpu_to_le32(i->value_len);
749                 if (i->value_len) {
750                         size_t size = EXT4_XATTR_SIZE(i->value_len);
751                         void *val = s->base + min_offs - size;
752                         s->here->e_value_offs = cpu_to_le16(min_offs - size);
753                         if (i->value == EXT4_ZERO_XATTR_VALUE) {
754                                 memset(val, 0, size);
755                         } else {
756                                 /* Clear the pad bytes first. */
757                                 memset(val + size - EXT4_XATTR_PAD, 0,
758                                        EXT4_XATTR_PAD);
759                                 memcpy(val, i->value, i->value_len);
760                         }
761                 }
762         }
763         return 0;
764 }
765
766 struct ext4_xattr_block_find {
767         struct ext4_xattr_search s;
768         struct buffer_head *bh;
769 };
770
771 static int
772 ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
773                       struct ext4_xattr_block_find *bs)
774 {
775         struct super_block *sb = inode->i_sb;
776         int error;
777
778         ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
779                   i->name_index, i->name, i->value, (long)i->value_len);
780
781         if (EXT4_I(inode)->i_file_acl) {
782                 /* The inode already has an extended attribute block. */
783                 bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
784                 error = -EIO;
785                 if (!bs->bh)
786                         goto cleanup;
787                 ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
788                         atomic_read(&(bs->bh->b_count)),
789                         le32_to_cpu(BHDR(bs->bh)->h_refcount));
790                 if (ext4_xattr_check_block(inode, bs->bh)) {
791                         EXT4_ERROR_INODE(inode, "bad block %llu",
792                                          EXT4_I(inode)->i_file_acl);
793                         error = -EFSCORRUPTED;
794                         goto cleanup;
795                 }
796                 /* Find the named attribute. */
797                 bs->s.base = BHDR(bs->bh);
798                 bs->s.first = BFIRST(bs->bh);
799                 bs->s.end = bs->bh->b_data + bs->bh->b_size;
800                 bs->s.here = bs->s.first;
801                 error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
802                                               i->name, bs->bh->b_size, 1);
803                 if (error && error != -ENODATA)
804                         goto cleanup;
805                 bs->s.not_found = error;
806         }
807         error = 0;
808
809 cleanup:
810         return error;
811 }
812
813 static int
814 ext4_xattr_block_set(handle_t *handle, struct inode *inode,
815                      struct ext4_xattr_info *i,
816                      struct ext4_xattr_block_find *bs)
817 {
818         struct super_block *sb = inode->i_sb;
819         struct buffer_head *new_bh = NULL;
820         struct ext4_xattr_search *s = &bs->s;
821         struct mb_cache_entry *ce = NULL;
822         int error = 0;
823         struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
824
825 #define header(x) ((struct ext4_xattr_header *)(x))
826
827         if (i->value && i->value_len > sb->s_blocksize)
828                 return -ENOSPC;
829         if (s->base) {
830                 BUFFER_TRACE(bs->bh, "get_write_access");
831                 error = ext4_journal_get_write_access(handle, bs->bh);
832                 if (error)
833                         goto cleanup;
834                 lock_buffer(bs->bh);
835
836                 if (header(s->base)->h_refcount == cpu_to_le32(1)) {
837                         __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
838
839                         /*
840                          * This must happen under buffer lock for
841                          * ext4_xattr_block_set() to reliably detect modified
842                          * block
843                          */
844                         mb_cache_entry_delete_block(ext4_mb_cache, hash,
845                                                     bs->bh->b_blocknr);
846                         ea_bdebug(bs->bh, "modifying in-place");
847                         error = ext4_xattr_set_entry(i, s);
848                         if (!error) {
849                                 if (!IS_LAST_ENTRY(s->first))
850                                         ext4_xattr_rehash(header(s->base),
851                                                           s->here);
852                                 ext4_xattr_cache_insert(ext4_mb_cache,
853                                         bs->bh);
854                         }
855                         unlock_buffer(bs->bh);
856                         if (error == -EFSCORRUPTED)
857                                 goto bad_block;
858                         if (!error)
859                                 error = ext4_handle_dirty_xattr_block(handle,
860                                                                       inode,
861                                                                       bs->bh);
862                         if (error)
863                                 goto cleanup;
864                         goto inserted;
865                 } else {
866                         int offset = (char *)s->here - bs->bh->b_data;
867
868                         unlock_buffer(bs->bh);
869                         ea_bdebug(bs->bh, "cloning");
870                         s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
871                         error = -ENOMEM;
872                         if (s->base == NULL)
873                                 goto cleanup;
874                         memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
875                         s->first = ENTRY(header(s->base)+1);
876                         header(s->base)->h_refcount = cpu_to_le32(1);
877                         s->here = ENTRY(s->base + offset);
878                         s->end = s->base + bs->bh->b_size;
879                 }
880         } else {
881                 /* Allocate a buffer where we construct the new block. */
882                 s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
883                 /* assert(header == s->base) */
884                 error = -ENOMEM;
885                 if (s->base == NULL)
886                         goto cleanup;
887                 header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
888                 header(s->base)->h_blocks = cpu_to_le32(1);
889                 header(s->base)->h_refcount = cpu_to_le32(1);
890                 s->first = ENTRY(header(s->base)+1);
891                 s->here = ENTRY(header(s->base)+1);
892                 s->end = s->base + sb->s_blocksize;
893         }
894
895         error = ext4_xattr_set_entry(i, s);
896         if (error == -EFSCORRUPTED)
897                 goto bad_block;
898         if (error)
899                 goto cleanup;
900         if (!IS_LAST_ENTRY(s->first))
901                 ext4_xattr_rehash(header(s->base), s->here);
902
903 inserted:
904         if (!IS_LAST_ENTRY(s->first)) {
905                 new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
906                 if (new_bh) {
907                         /* We found an identical block in the cache. */
908                         if (new_bh == bs->bh)
909                                 ea_bdebug(new_bh, "keeping");
910                         else {
911                                 u32 ref;
912
913                                 /* The old block is released after updating
914                                    the inode. */
915                                 error = dquot_alloc_block(inode,
916                                                 EXT4_C2B(EXT4_SB(sb), 1));
917                                 if (error)
918                                         goto cleanup;
919                                 BUFFER_TRACE(new_bh, "get_write_access");
920                                 error = ext4_journal_get_write_access(handle,
921                                                                       new_bh);
922                                 if (error)
923                                         goto cleanup_dquot;
924                                 lock_buffer(new_bh);
925                                 /*
926                                  * We have to be careful about races with
927                                  * freeing, rehashing or adding references to
928                                  * xattr block. Once we hold buffer lock xattr
929                                  * block's state is stable so we can check
930                                  * whether the block got freed / rehashed or
931                                  * not.  Since we unhash mbcache entry under
932                                  * buffer lock when freeing / rehashing xattr
933                                  * block, checking whether entry is still
934                                  * hashed is reliable. Same rules hold for
935                                  * e_reusable handling.
936                                  */
937                                 if (hlist_bl_unhashed(&ce->e_hash_list) ||
938                                     !ce->e_reusable) {
939                                         /*
940                                          * Undo everything and check mbcache
941                                          * again.
942                                          */
943                                         unlock_buffer(new_bh);
944                                         dquot_free_block(inode,
945                                                          EXT4_C2B(EXT4_SB(sb),
946                                                                   1));
947                                         brelse(new_bh);
948                                         mb_cache_entry_put(ext4_mb_cache, ce);
949                                         ce = NULL;
950                                         new_bh = NULL;
951                                         goto inserted;
952                                 }
953                                 ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
954                                 BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
955                                 if (ref >= EXT4_XATTR_REFCOUNT_MAX)
956                                         ce->e_reusable = 0;
957                                 ea_bdebug(new_bh, "reusing; refcount now=%d",
958                                           ref);
959                                 unlock_buffer(new_bh);
960                                 error = ext4_handle_dirty_xattr_block(handle,
961                                                                       inode,
962                                                                       new_bh);
963                                 if (error)
964                                         goto cleanup_dquot;
965                         }
966                         mb_cache_entry_touch(ext4_mb_cache, ce);
967                         mb_cache_entry_put(ext4_mb_cache, ce);
968                         ce = NULL;
969                 } else if (bs->bh && s->base == bs->bh->b_data) {
970                         /* We were modifying this block in-place. */
971                         ea_bdebug(bs->bh, "keeping this block");
972                         new_bh = bs->bh;
973                         get_bh(new_bh);
974                 } else {
975                         /* We need to allocate a new block */
976                         ext4_fsblk_t goal, block;
977
978                         goal = ext4_group_first_block_no(sb,
979                                                 EXT4_I(inode)->i_block_group);
980
981                         /* non-extent files can't have physical blocks past 2^32 */
982                         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
983                                 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
984
985                         block = ext4_new_meta_blocks(handle, inode, goal, 0,
986                                                      NULL, &error);
987                         if (error)
988                                 goto cleanup;
989
990                         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
991                                 BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
992
993                         ea_idebug(inode, "creating block %llu",
994                                   (unsigned long long)block);
995
996                         new_bh = sb_getblk(sb, block);
997                         if (unlikely(!new_bh)) {
998                                 error = -ENOMEM;
999 getblk_failed:
1000                                 ext4_free_blocks(handle, inode, NULL, block, 1,
1001                                                  EXT4_FREE_BLOCKS_METADATA);
1002                                 goto cleanup;
1003                         }
1004                         lock_buffer(new_bh);
1005                         error = ext4_journal_get_create_access(handle, new_bh);
1006                         if (error) {
1007                                 unlock_buffer(new_bh);
1008                                 error = -EIO;
1009                                 goto getblk_failed;
1010                         }
1011                         memcpy(new_bh->b_data, s->base, new_bh->b_size);
1012                         set_buffer_uptodate(new_bh);
1013                         unlock_buffer(new_bh);
1014                         ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
1015                         error = ext4_handle_dirty_xattr_block(handle,
1016                                                               inode, new_bh);
1017                         if (error)
1018                                 goto cleanup;
1019                 }
1020         }
1021
1022         /* Update the inode. */
1023         EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
1024
1025         /* Drop the previous xattr block. */
1026         if (bs->bh && bs->bh != new_bh)
1027                 ext4_xattr_release_block(handle, inode, bs->bh);
1028         error = 0;
1029
1030 cleanup:
1031         if (ce)
1032                 mb_cache_entry_put(ext4_mb_cache, ce);
1033         brelse(new_bh);
1034         if (!(bs->bh && s->base == bs->bh->b_data))
1035                 kfree(s->base);
1036
1037         return error;
1038
1039 cleanup_dquot:
1040         dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
1041         goto cleanup;
1042
1043 bad_block:
1044         EXT4_ERROR_INODE(inode, "bad block %llu",
1045                          EXT4_I(inode)->i_file_acl);
1046         goto cleanup;
1047
1048 #undef header
1049 }
1050
1051 int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
1052                           struct ext4_xattr_ibody_find *is)
1053 {
1054         struct ext4_xattr_ibody_header *header;
1055         struct ext4_inode *raw_inode;
1056         int error;
1057
1058         if (EXT4_I(inode)->i_extra_isize == 0)
1059                 return 0;
1060         raw_inode = ext4_raw_inode(&is->iloc);
1061         header = IHDR(inode, raw_inode);
1062         is->s.base = is->s.first = IFIRST(header);
1063         is->s.here = is->s.first;
1064         is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
1065         if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
1066                 error = xattr_check_inode(inode, header, is->s.end);
1067                 if (error)
1068                         return error;
1069                 /* Find the named attribute. */
1070                 error = ext4_xattr_find_entry(&is->s.here, i->name_index,
1071                                               i->name, is->s.end -
1072                                               (void *)is->s.base, 0);
1073                 if (error && error != -ENODATA)
1074                         return error;
1075                 is->s.not_found = error;
1076         }
1077         return 0;
1078 }
1079
1080 int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
1081                                 struct ext4_xattr_info *i,
1082                                 struct ext4_xattr_ibody_find *is)
1083 {
1084         struct ext4_xattr_ibody_header *header;
1085         struct ext4_xattr_search *s = &is->s;
1086         int error;
1087
1088         if (EXT4_I(inode)->i_extra_isize == 0)
1089                 return -ENOSPC;
1090         error = ext4_xattr_set_entry(i, s);
1091         if (error) {
1092                 if (error == -ENOSPC &&
1093                     ext4_has_inline_data(inode)) {
1094                         error = ext4_try_to_evict_inline_data(handle, inode,
1095                                         EXT4_XATTR_LEN(strlen(i->name) +
1096                                         EXT4_XATTR_SIZE(i->value_len)));
1097                         if (error)
1098                                 return error;
1099                         error = ext4_xattr_ibody_find(inode, i, is);
1100                         if (error)
1101                                 return error;
1102                         error = ext4_xattr_set_entry(i, s);
1103                 }
1104                 if (error)
1105                         return error;
1106         }
1107         header = IHDR(inode, ext4_raw_inode(&is->iloc));
1108         if (!IS_LAST_ENTRY(s->first)) {
1109                 header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1110                 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1111         } else {
1112                 header->h_magic = cpu_to_le32(0);
1113                 ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1114         }
1115         return 0;
1116 }
1117
1118 static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
1119                                 struct ext4_xattr_info *i,
1120                                 struct ext4_xattr_ibody_find *is)
1121 {
1122         struct ext4_xattr_ibody_header *header;
1123         struct ext4_xattr_search *s = &is->s;
1124         int error;
1125
1126         if (EXT4_I(inode)->i_extra_isize == 0)
1127                 return -ENOSPC;
1128         error = ext4_xattr_set_entry(i, s);
1129         if (error)
1130                 return error;
1131         header = IHDR(inode, ext4_raw_inode(&is->iloc));
1132         if (!IS_LAST_ENTRY(s->first)) {
1133                 header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1134                 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1135         } else {
1136                 header->h_magic = cpu_to_le32(0);
1137                 ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1138         }
1139         return 0;
1140 }
1141
1142 static int ext4_xattr_value_same(struct ext4_xattr_search *s,
1143                                  struct ext4_xattr_info *i)
1144 {
1145         void *value;
1146
1147         if (le32_to_cpu(s->here->e_value_size) != i->value_len)
1148                 return 0;
1149         value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
1150         return !memcmp(value, i->value, i->value_len);
1151 }
1152
1153 /*
1154  * ext4_xattr_set_handle()
1155  *
1156  * Create, replace or remove an extended attribute for this inode.  Value
1157  * is NULL to remove an existing extended attribute, and non-NULL to
1158  * either replace an existing extended attribute, or create a new extended
1159  * attribute. The flags XATTR_REPLACE and XATTR_CREATE
1160  * specify that an extended attribute must exist and must not exist
1161  * previous to the call, respectively.
1162  *
1163  * Returns 0, or a negative error number on failure.
1164  */
1165 int
1166 ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1167                       const char *name, const void *value, size_t value_len,
1168                       int flags)
1169 {
1170         struct ext4_xattr_info i = {
1171                 .name_index = name_index,
1172                 .name = name,
1173                 .value = value,
1174                 .value_len = value_len,
1175
1176         };
1177         struct ext4_xattr_ibody_find is = {
1178                 .s = { .not_found = -ENODATA, },
1179         };
1180         struct ext4_xattr_block_find bs = {
1181                 .s = { .not_found = -ENODATA, },
1182         };
1183         unsigned long no_expand;
1184         int error;
1185
1186         if (!name)
1187                 return -EINVAL;
1188         if (strlen(name) > 255)
1189                 return -ERANGE;
1190         down_write(&EXT4_I(inode)->xattr_sem);
1191         no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
1192         ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
1193
1194         error = ext4_reserve_inode_write(handle, inode, &is.iloc);
1195         if (error)
1196                 goto cleanup;
1197
1198         if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
1199                 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
1200                 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
1201                 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
1202         }
1203
1204         error = ext4_xattr_ibody_find(inode, &i, &is);
1205         if (error)
1206                 goto cleanup;
1207         if (is.s.not_found)
1208                 error = ext4_xattr_block_find(inode, &i, &bs);
1209         if (error)
1210                 goto cleanup;
1211         if (is.s.not_found && bs.s.not_found) {
1212                 error = -ENODATA;
1213                 if (flags & XATTR_REPLACE)
1214                         goto cleanup;
1215                 error = 0;
1216                 if (!value)
1217                         goto cleanup;
1218         } else {
1219                 error = -EEXIST;
1220                 if (flags & XATTR_CREATE)
1221                         goto cleanup;
1222         }
1223         if (!value) {
1224                 if (!is.s.not_found)
1225                         error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1226                 else if (!bs.s.not_found)
1227                         error = ext4_xattr_block_set(handle, inode, &i, &bs);
1228         } else {
1229                 error = 0;
1230                 /* Xattr value did not change? Save us some work and bail out */
1231                 if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i))
1232                         goto cleanup;
1233                 if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
1234                         goto cleanup;
1235
1236                 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1237                 if (!error && !bs.s.not_found) {
1238                         i.value = NULL;
1239                         error = ext4_xattr_block_set(handle, inode, &i, &bs);
1240                 } else if (error == -ENOSPC) {
1241                         if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
1242                                 error = ext4_xattr_block_find(inode, &i, &bs);
1243                                 if (error)
1244                                         goto cleanup;
1245                         }
1246                         error = ext4_xattr_block_set(handle, inode, &i, &bs);
1247                         if (error)
1248                                 goto cleanup;
1249                         if (!is.s.not_found) {
1250                                 i.value = NULL;
1251                                 error = ext4_xattr_ibody_set(handle, inode, &i,
1252                                                              &is);
1253                         }
1254                 }
1255         }
1256         if (!error) {
1257                 ext4_xattr_update_super_block(handle, inode->i_sb);
1258                 inode->i_ctime = ext4_current_time(inode);
1259                 if (!value)
1260                         ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1261                 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1262                 /*
1263                  * The bh is consumed by ext4_mark_iloc_dirty, even with
1264                  * error != 0.
1265                  */
1266                 is.iloc.bh = NULL;
1267                 if (IS_SYNC(inode))
1268                         ext4_handle_sync(handle);
1269         }
1270
1271 cleanup:
1272         brelse(is.iloc.bh);
1273         brelse(bs.bh);
1274         if (no_expand == 0)
1275                 ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1276         up_write(&EXT4_I(inode)->xattr_sem);
1277         return error;
1278 }
1279
1280 /*
1281  * ext4_xattr_set()
1282  *
1283  * Like ext4_xattr_set_handle, but start from an inode. This extended
1284  * attribute modification is a filesystem transaction by itself.
1285  *
1286  * Returns 0, or a negative error number on failure.
1287  */
1288 int
1289 ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1290                const void *value, size_t value_len, int flags)
1291 {
1292         handle_t *handle;
1293         int error, retries = 0;
1294         int credits = ext4_jbd2_credits_xattr(inode);
1295
1296 retry:
1297         handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1298         if (IS_ERR(handle)) {
1299                 error = PTR_ERR(handle);
1300         } else {
1301                 int error2;
1302
1303                 error = ext4_xattr_set_handle(handle, inode, name_index, name,
1304                                               value, value_len, flags);
1305                 error2 = ext4_journal_stop(handle);
1306                 if (error == -ENOSPC &&
1307                     ext4_should_retry_alloc(inode->i_sb, &retries))
1308                         goto retry;
1309                 if (error == 0)
1310                         error = error2;
1311         }
1312
1313         return error;
1314 }
1315
1316 /*
1317  * Shift the EA entries in the inode to create space for the increased
1318  * i_extra_isize.
1319  */
1320 static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
1321                                      int value_offs_shift, void *to,
1322                                      void *from, size_t n)
1323 {
1324         struct ext4_xattr_entry *last = entry;
1325         int new_offs;
1326
1327         /* We always shift xattr headers further thus offsets get lower */
1328         BUG_ON(value_offs_shift > 0);
1329
1330         /* Adjust the value offsets of the entries */
1331         for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1332                 if (last->e_value_size) {
1333                         new_offs = le16_to_cpu(last->e_value_offs) +
1334                                                         value_offs_shift;
1335                         last->e_value_offs = cpu_to_le16(new_offs);
1336                 }
1337         }
1338         /* Shift the entries by n bytes */
1339         memmove(to, from, n);
1340 }
1341
1342 /*
1343  * Move xattr pointed to by 'entry' from inode into external xattr block
1344  */
1345 static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
1346                                     struct ext4_inode *raw_inode,
1347                                     struct ext4_xattr_entry *entry)
1348 {
1349         struct ext4_xattr_ibody_find *is = NULL;
1350         struct ext4_xattr_block_find *bs = NULL;
1351         char *buffer = NULL, *b_entry_name = NULL;
1352         size_t value_offs, value_size;
1353         struct ext4_xattr_info i = {
1354                 .value = NULL,
1355                 .value_len = 0,
1356                 .name_index = entry->e_name_index,
1357         };
1358         struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
1359         int error;
1360
1361         value_offs = le16_to_cpu(entry->e_value_offs);
1362         value_size = le32_to_cpu(entry->e_value_size);
1363
1364         is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
1365         bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
1366         buffer = kmalloc(value_size, GFP_NOFS);
1367         b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
1368         if (!is || !bs || !buffer || !b_entry_name) {
1369                 error = -ENOMEM;
1370                 goto out;
1371         }
1372
1373         is->s.not_found = -ENODATA;
1374         bs->s.not_found = -ENODATA;
1375         is->iloc.bh = NULL;
1376         bs->bh = NULL;
1377
1378         /* Save the entry name and the entry value */
1379         memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
1380         memcpy(b_entry_name, entry->e_name, entry->e_name_len);
1381         b_entry_name[entry->e_name_len] = '\0';
1382         i.name = b_entry_name;
1383
1384         error = ext4_get_inode_loc(inode, &is->iloc);
1385         if (error)
1386                 goto out;
1387
1388         error = ext4_xattr_ibody_find(inode, &i, is);
1389         if (error)
1390                 goto out;
1391
1392         /* Remove the chosen entry from the inode */
1393         error = ext4_xattr_ibody_set(handle, inode, &i, is);
1394         if (error)
1395                 goto out;
1396
1397         i.name = b_entry_name;
1398         i.value = buffer;
1399         i.value_len = value_size;
1400         error = ext4_xattr_block_find(inode, &i, bs);
1401         if (error)
1402                 goto out;
1403
1404         /* Add entry which was removed from the inode into the block */
1405         error = ext4_xattr_block_set(handle, inode, &i, bs);
1406         if (error)
1407                 goto out;
1408         error = 0;
1409 out:
1410         kfree(b_entry_name);
1411         kfree(buffer);
1412         if (is)
1413                 brelse(is->iloc.bh);
1414         kfree(is);
1415         kfree(bs);
1416
1417         return error;
1418 }
1419
1420 static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
1421                                        struct ext4_inode *raw_inode,
1422                                        int isize_diff, size_t ifree,
1423                                        size_t bfree, int *total_ino)
1424 {
1425         struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
1426         struct ext4_xattr_entry *small_entry;
1427         struct ext4_xattr_entry *entry;
1428         struct ext4_xattr_entry *last;
1429         unsigned int entry_size;        /* EA entry size */
1430         unsigned int total_size;        /* EA entry size + value size */
1431         unsigned int min_total_size;
1432         int error;
1433
1434         while (isize_diff > ifree) {
1435                 entry = NULL;
1436                 small_entry = NULL;
1437                 min_total_size = ~0U;
1438                 last = IFIRST(header);
1439                 /* Find the entry best suited to be pushed into EA block */
1440                 for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1441                         total_size =
1442                         EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
1443                                         EXT4_XATTR_LEN(last->e_name_len);
1444                         if (total_size <= bfree &&
1445                             total_size < min_total_size) {
1446                                 if (total_size + ifree < isize_diff) {
1447                                         small_entry = last;
1448                                 } else {
1449                                         entry = last;
1450                                         min_total_size = total_size;
1451                                 }
1452                         }
1453                 }
1454
1455                 if (entry == NULL) {
1456                         if (small_entry == NULL)
1457                                 return -ENOSPC;
1458                         entry = small_entry;
1459                 }
1460
1461                 entry_size = EXT4_XATTR_LEN(entry->e_name_len);
1462                 total_size = entry_size +
1463                         EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
1464                 error = ext4_xattr_move_to_block(handle, inode, raw_inode,
1465                                                  entry);
1466                 if (error)
1467                         return error;
1468
1469                 *total_ino -= entry_size;
1470                 ifree += total_size;
1471                 bfree -= total_size;
1472         }
1473
1474         return 0;
1475 }
1476
1477 /*
1478  * Expand an inode by new_extra_isize bytes when EAs are present.
1479  * Returns 0 on success or negative error number on failure.
1480  */
1481 int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
1482                                struct ext4_inode *raw_inode, handle_t *handle)
1483 {
1484         struct ext4_xattr_ibody_header *header;
1485         struct buffer_head *bh = NULL;
1486         size_t min_offs;
1487         size_t ifree, bfree;
1488         int total_ino;
1489         void *base, *end;
1490         int error = 0, tried_min_extra_isize = 0;
1491         int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
1492         int isize_diff; /* How much do we need to grow i_extra_isize */
1493
1494         down_write(&EXT4_I(inode)->xattr_sem);
1495         /*
1496          * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
1497          */
1498         ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
1499 retry:
1500         isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
1501         if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
1502                 goto out;
1503
1504         header = IHDR(inode, raw_inode);
1505
1506         /*
1507          * Check if enough free space is available in the inode to shift the
1508          * entries ahead by new_extra_isize.
1509          */
1510
1511         base = IFIRST(header);
1512         end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
1513         min_offs = end - base;
1514         total_ino = sizeof(struct ext4_xattr_ibody_header);
1515
1516         error = xattr_check_inode(inode, header, end);
1517         if (error)
1518                 goto cleanup;
1519
1520         ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino);
1521         if (ifree >= isize_diff)
1522                 goto shift;
1523
1524         /*
1525          * Enough free space isn't available in the inode, check if
1526          * EA block can hold new_extra_isize bytes.
1527          */
1528         if (EXT4_I(inode)->i_file_acl) {
1529                 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1530                 error = -EIO;
1531                 if (!bh)
1532                         goto cleanup;
1533                 if (ext4_xattr_check_block(inode, bh)) {
1534                         EXT4_ERROR_INODE(inode, "bad block %llu",
1535                                          EXT4_I(inode)->i_file_acl);
1536                         error = -EFSCORRUPTED;
1537                         goto cleanup;
1538                 }
1539                 base = BHDR(bh);
1540                 end = bh->b_data + bh->b_size;
1541                 min_offs = end - base;
1542                 bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base,
1543                                               NULL);
1544                 if (bfree + ifree < isize_diff) {
1545                         if (!tried_min_extra_isize && s_min_extra_isize) {
1546                                 tried_min_extra_isize++;
1547                                 new_extra_isize = s_min_extra_isize;
1548                                 brelse(bh);
1549                                 goto retry;
1550                         }
1551                         error = -ENOSPC;
1552                         goto cleanup;
1553                 }
1554         } else {
1555                 bfree = inode->i_sb->s_blocksize;
1556         }
1557
1558         error = ext4_xattr_make_inode_space(handle, inode, raw_inode,
1559                                             isize_diff, ifree, bfree,
1560                                             &total_ino);
1561         if (error) {
1562                 if (error == -ENOSPC && !tried_min_extra_isize &&
1563                     s_min_extra_isize) {
1564                         tried_min_extra_isize++;
1565                         new_extra_isize = s_min_extra_isize;
1566                         brelse(bh);
1567                         goto retry;
1568                 }
1569                 goto cleanup;
1570         }
1571 shift:
1572         /* Adjust the offsets and shift the remaining entries ahead */
1573         ext4_xattr_shift_entries(IFIRST(header), EXT4_I(inode)->i_extra_isize
1574                         - new_extra_isize, (void *)raw_inode +
1575                         EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
1576                         (void *)header, total_ino);
1577         EXT4_I(inode)->i_extra_isize = new_extra_isize;
1578         brelse(bh);
1579 out:
1580         ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1581         up_write(&EXT4_I(inode)->xattr_sem);
1582         return 0;
1583
1584 cleanup:
1585         brelse(bh);
1586         /*
1587          * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
1588          * size expansion failed.
1589          */
1590         up_write(&EXT4_I(inode)->xattr_sem);
1591         return error;
1592 }
1593
1594
1595
1596 /*
1597  * ext4_xattr_delete_inode()
1598  *
1599  * Free extended attribute resources associated with this inode. This
1600  * is called immediately before an inode is freed. We have exclusive
1601  * access to the inode.
1602  */
1603 void
1604 ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1605 {
1606         struct buffer_head *bh = NULL;
1607
1608         if (!EXT4_I(inode)->i_file_acl)
1609                 goto cleanup;
1610         bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1611         if (!bh) {
1612                 EXT4_ERROR_INODE(inode, "block %llu read error",
1613                                  EXT4_I(inode)->i_file_acl);
1614                 goto cleanup;
1615         }
1616         if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1617             BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1618                 EXT4_ERROR_INODE(inode, "bad block %llu",
1619                                  EXT4_I(inode)->i_file_acl);
1620                 goto cleanup;
1621         }
1622         ext4_xattr_release_block(handle, inode, bh);
1623         EXT4_I(inode)->i_file_acl = 0;
1624
1625 cleanup:
1626         brelse(bh);
1627 }
1628
1629 /*
1630  * ext4_xattr_cache_insert()
1631  *
1632  * Create a new entry in the extended attribute cache, and insert
1633  * it unless such an entry is already in the cache.
1634  *
1635  * Returns 0, or a negative error number on failure.
1636  */
1637 static void
1638 ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
1639 {
1640         struct ext4_xattr_header *header = BHDR(bh);
1641         __u32 hash = le32_to_cpu(header->h_hash);
1642         int reusable = le32_to_cpu(header->h_refcount) <
1643                        EXT4_XATTR_REFCOUNT_MAX;
1644         int error;
1645
1646         error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
1647                                       bh->b_blocknr, reusable);
1648         if (error) {
1649                 if (error == -EBUSY)
1650                         ea_bdebug(bh, "already in cache");
1651         } else
1652                 ea_bdebug(bh, "inserting [%x]", (int)hash);
1653 }
1654
1655 /*
1656  * ext4_xattr_cmp()
1657  *
1658  * Compare two extended attribute blocks for equality.
1659  *
1660  * Returns 0 if the blocks are equal, 1 if they differ, and
1661  * a negative error number on errors.
1662  */
1663 static int
1664 ext4_xattr_cmp(struct ext4_xattr_header *header1,
1665                struct ext4_xattr_header *header2)
1666 {
1667         struct ext4_xattr_entry *entry1, *entry2;
1668
1669         entry1 = ENTRY(header1+1);
1670         entry2 = ENTRY(header2+1);
1671         while (!IS_LAST_ENTRY(entry1)) {
1672                 if (IS_LAST_ENTRY(entry2))
1673                         return 1;
1674                 if (entry1->e_hash != entry2->e_hash ||
1675                     entry1->e_name_index != entry2->e_name_index ||
1676                     entry1->e_name_len != entry2->e_name_len ||
1677                     entry1->e_value_size != entry2->e_value_size ||
1678                     memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1679                         return 1;
1680                 if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1681                         return -EFSCORRUPTED;
1682                 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1683                            (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1684                            le32_to_cpu(entry1->e_value_size)))
1685                         return 1;
1686
1687                 entry1 = EXT4_XATTR_NEXT(entry1);
1688                 entry2 = EXT4_XATTR_NEXT(entry2);
1689         }
1690         if (!IS_LAST_ENTRY(entry2))
1691                 return 1;
1692         return 0;
1693 }
1694
1695 /*
1696  * ext4_xattr_cache_find()
1697  *
1698  * Find an identical extended attribute block.
1699  *
1700  * Returns a pointer to the block found, or NULL if such a block was
1701  * not found or an error occurred.
1702  */
1703 static struct buffer_head *
1704 ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1705                       struct mb_cache_entry **pce)
1706 {
1707         __u32 hash = le32_to_cpu(header->h_hash);
1708         struct mb_cache_entry *ce;
1709         struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
1710
1711         if (!header->h_hash)
1712                 return NULL;  /* never share */
1713         ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1714         ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
1715         while (ce) {
1716                 struct buffer_head *bh;
1717
1718                 bh = sb_bread(inode->i_sb, ce->e_block);
1719                 if (!bh) {
1720                         EXT4_ERROR_INODE(inode, "block %lu read error",
1721                                          (unsigned long) ce->e_block);
1722                 } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
1723                         *pce = ce;
1724                         return bh;
1725                 }
1726                 brelse(bh);
1727                 ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
1728         }
1729         return NULL;
1730 }
1731
1732 #define NAME_HASH_SHIFT 5
1733 #define VALUE_HASH_SHIFT 16
1734
1735 /*
1736  * ext4_xattr_hash_entry()
1737  *
1738  * Compute the hash of an extended attribute.
1739  */
1740 static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1741                                          struct ext4_xattr_entry *entry)
1742 {
1743         __u32 hash = 0;
1744         char *name = entry->e_name;
1745         int n;
1746
1747         for (n = 0; n < entry->e_name_len; n++) {
1748                 hash = (hash << NAME_HASH_SHIFT) ^
1749                        (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1750                        *name++;
1751         }
1752
1753         if (entry->e_value_size != 0) {
1754                 __le32 *value = (__le32 *)((char *)header +
1755                         le16_to_cpu(entry->e_value_offs));
1756                 for (n = (le32_to_cpu(entry->e_value_size) +
1757                      EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
1758                         hash = (hash << VALUE_HASH_SHIFT) ^
1759                                (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1760                                le32_to_cpu(*value++);
1761                 }
1762         }
1763         entry->e_hash = cpu_to_le32(hash);
1764 }
1765
1766 #undef NAME_HASH_SHIFT
1767 #undef VALUE_HASH_SHIFT
1768
1769 #define BLOCK_HASH_SHIFT 16
1770
1771 /*
1772  * ext4_xattr_rehash()
1773  *
1774  * Re-compute the extended attribute hash value after an entry has changed.
1775  */
1776 static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1777                               struct ext4_xattr_entry *entry)
1778 {
1779         struct ext4_xattr_entry *here;
1780         __u32 hash = 0;
1781
1782         ext4_xattr_hash_entry(header, entry);
1783         here = ENTRY(header+1);
1784         while (!IS_LAST_ENTRY(here)) {
1785                 if (!here->e_hash) {
1786                         /* Block is not shared if an entry's hash value == 0 */
1787                         hash = 0;
1788                         break;
1789                 }
1790                 hash = (hash << BLOCK_HASH_SHIFT) ^
1791                        (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1792                        le32_to_cpu(here->e_hash);
1793                 here = EXT4_XATTR_NEXT(here);
1794         }
1795         header->h_hash = cpu_to_le32(hash);
1796 }
1797
1798 #undef BLOCK_HASH_SHIFT
1799
1800 #define HASH_BUCKET_BITS        10
1801
1802 struct mb_cache *
1803 ext4_xattr_create_cache(void)
1804 {
1805         return mb_cache_create(HASH_BUCKET_BITS);
1806 }
1807
1808 void ext4_xattr_destroy_cache(struct mb_cache *cache)
1809 {
1810         if (cache)
1811                 mb_cache_destroy(cache);
1812 }
1813