Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszer...
[cascardo/linux.git] / fs / overlayfs / super.c
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9
10 #include <linux/fs.h>
11 #include <linux/namei.h>
12 #include <linux/pagemap.h>
13 #include <linux/xattr.h>
14 #include <linux/security.h>
15 #include <linux/mount.h>
16 #include <linux/slab.h>
17 #include <linux/parser.h>
18 #include <linux/module.h>
19 #include <linux/sched.h>
20 #include <linux/statfs.h>
21 #include <linux/seq_file.h>
22 #include "overlayfs.h"
23
24 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
25 MODULE_DESCRIPTION("Overlay filesystem");
26 MODULE_LICENSE("GPL");
27
28 struct ovl_config {
29         char *lowerdir;
30         char *upperdir;
31         char *workdir;
32         bool default_permissions;
33 };
34
35 /* private information held for overlayfs's superblock */
36 struct ovl_fs {
37         struct vfsmount *upper_mnt;
38         unsigned numlower;
39         struct vfsmount **lower_mnt;
40         struct dentry *workdir;
41         long lower_namelen;
42         /* pathnames of lower and upper dirs, for show_options */
43         struct ovl_config config;
44 };
45
46 struct ovl_dir_cache;
47
48 /* private information held for every overlayfs dentry */
49 struct ovl_entry {
50         struct dentry *__upperdentry;
51         struct ovl_dir_cache *cache;
52         union {
53                 struct {
54                         u64 version;
55                         bool opaque;
56                 };
57                 struct rcu_head rcu;
58         };
59         unsigned numlower;
60         struct path lowerstack[];
61 };
62
63 #define OVL_MAX_STACK 500
64
65 static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe)
66 {
67         return oe->numlower ? oe->lowerstack[0].dentry : NULL;
68 }
69
70 enum ovl_path_type ovl_path_type(struct dentry *dentry)
71 {
72         struct ovl_entry *oe = dentry->d_fsdata;
73         enum ovl_path_type type = 0;
74
75         if (oe->__upperdentry) {
76                 type = __OVL_PATH_UPPER;
77
78                 if (oe->numlower) {
79                         if (S_ISDIR(dentry->d_inode->i_mode))
80                                 type |= __OVL_PATH_MERGE;
81                 } else if (!oe->opaque) {
82                         type |= __OVL_PATH_PURE;
83                 }
84         } else {
85                 if (oe->numlower > 1)
86                         type |= __OVL_PATH_MERGE;
87         }
88         return type;
89 }
90
91 static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
92 {
93         return lockless_dereference(oe->__upperdentry);
94 }
95
96 void ovl_path_upper(struct dentry *dentry, struct path *path)
97 {
98         struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
99         struct ovl_entry *oe = dentry->d_fsdata;
100
101         path->mnt = ofs->upper_mnt;
102         path->dentry = ovl_upperdentry_dereference(oe);
103 }
104
105 enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
106 {
107         enum ovl_path_type type = ovl_path_type(dentry);
108
109         if (!OVL_TYPE_UPPER(type))
110                 ovl_path_lower(dentry, path);
111         else
112                 ovl_path_upper(dentry, path);
113
114         return type;
115 }
116
117 struct dentry *ovl_dentry_upper(struct dentry *dentry)
118 {
119         struct ovl_entry *oe = dentry->d_fsdata;
120
121         return ovl_upperdentry_dereference(oe);
122 }
123
124 struct dentry *ovl_dentry_lower(struct dentry *dentry)
125 {
126         struct ovl_entry *oe = dentry->d_fsdata;
127
128         return __ovl_dentry_lower(oe);
129 }
130
131 struct dentry *ovl_dentry_real(struct dentry *dentry)
132 {
133         struct ovl_entry *oe = dentry->d_fsdata;
134         struct dentry *realdentry;
135
136         realdentry = ovl_upperdentry_dereference(oe);
137         if (!realdentry)
138                 realdentry = __ovl_dentry_lower(oe);
139
140         return realdentry;
141 }
142
143 struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
144 {
145         struct dentry *realdentry;
146
147         realdentry = ovl_upperdentry_dereference(oe);
148         if (realdentry) {
149                 *is_upper = true;
150         } else {
151                 realdentry = __ovl_dentry_lower(oe);
152                 *is_upper = false;
153         }
154         return realdentry;
155 }
156
157 struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode,
158                                     bool is_upper)
159 {
160         if (is_upper) {
161                 struct ovl_fs *ofs = inode->i_sb->s_fs_info;
162
163                 return ofs->upper_mnt;
164         } else {
165                 return oe->numlower ? oe->lowerstack[0].mnt : NULL;
166         }
167 }
168
169 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
170 {
171         struct ovl_entry *oe = dentry->d_fsdata;
172
173         return oe->cache;
174 }
175
176 bool ovl_is_default_permissions(struct inode *inode)
177 {
178         struct ovl_fs *ofs = inode->i_sb->s_fs_info;
179
180         return ofs->config.default_permissions;
181 }
182
183 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
184 {
185         struct ovl_entry *oe = dentry->d_fsdata;
186
187         oe->cache = cache;
188 }
189
190 void ovl_path_lower(struct dentry *dentry, struct path *path)
191 {
192         struct ovl_entry *oe = dentry->d_fsdata;
193
194         *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL };
195 }
196
197 int ovl_want_write(struct dentry *dentry)
198 {
199         struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
200         return mnt_want_write(ofs->upper_mnt);
201 }
202
203 void ovl_drop_write(struct dentry *dentry)
204 {
205         struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
206         mnt_drop_write(ofs->upper_mnt);
207 }
208
209 struct dentry *ovl_workdir(struct dentry *dentry)
210 {
211         struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
212         return ofs->workdir;
213 }
214
215 bool ovl_dentry_is_opaque(struct dentry *dentry)
216 {
217         struct ovl_entry *oe = dentry->d_fsdata;
218         return oe->opaque;
219 }
220
221 void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
222 {
223         struct ovl_entry *oe = dentry->d_fsdata;
224         oe->opaque = opaque;
225 }
226
227 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
228 {
229         struct ovl_entry *oe = dentry->d_fsdata;
230
231         WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
232         WARN_ON(oe->__upperdentry);
233         BUG_ON(!upperdentry->d_inode);
234         /*
235          * Make sure upperdentry is consistent before making it visible to
236          * ovl_upperdentry_dereference().
237          */
238         smp_wmb();
239         oe->__upperdentry = upperdentry;
240 }
241
242 void ovl_dentry_version_inc(struct dentry *dentry)
243 {
244         struct ovl_entry *oe = dentry->d_fsdata;
245
246         WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
247         oe->version++;
248 }
249
250 u64 ovl_dentry_version_get(struct dentry *dentry)
251 {
252         struct ovl_entry *oe = dentry->d_fsdata;
253
254         WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
255         return oe->version;
256 }
257
258 bool ovl_is_whiteout(struct dentry *dentry)
259 {
260         struct inode *inode = dentry->d_inode;
261
262         return inode && IS_WHITEOUT(inode);
263 }
264
265 static bool ovl_is_opaquedir(struct dentry *dentry)
266 {
267         int res;
268         char val;
269         struct inode *inode = dentry->d_inode;
270
271         if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr)
272                 return false;
273
274         res = inode->i_op->getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
275         if (res == 1 && val == 'y')
276                 return true;
277
278         return false;
279 }
280
281 static void ovl_dentry_release(struct dentry *dentry)
282 {
283         struct ovl_entry *oe = dentry->d_fsdata;
284
285         if (oe) {
286                 unsigned int i;
287
288                 dput(oe->__upperdentry);
289                 for (i = 0; i < oe->numlower; i++)
290                         dput(oe->lowerstack[i].dentry);
291                 kfree_rcu(oe, rcu);
292         }
293 }
294
295 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
296 {
297         struct ovl_entry *oe = dentry->d_fsdata;
298         unsigned int i;
299         int ret = 1;
300
301         for (i = 0; i < oe->numlower; i++) {
302                 struct dentry *d = oe->lowerstack[i].dentry;
303
304                 if (d->d_flags & DCACHE_OP_REVALIDATE) {
305                         ret = d->d_op->d_revalidate(d, flags);
306                         if (ret < 0)
307                                 return ret;
308                         if (!ret) {
309                                 if (!(flags & LOOKUP_RCU))
310                                         d_invalidate(d);
311                                 return -ESTALE;
312                         }
313                 }
314         }
315         return 1;
316 }
317
318 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
319 {
320         struct ovl_entry *oe = dentry->d_fsdata;
321         unsigned int i;
322         int ret = 1;
323
324         for (i = 0; i < oe->numlower; i++) {
325                 struct dentry *d = oe->lowerstack[i].dentry;
326
327                 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
328                         ret = d->d_op->d_weak_revalidate(d, flags);
329                         if (ret <= 0)
330                                 break;
331                 }
332         }
333         return ret;
334 }
335
336 static const struct dentry_operations ovl_dentry_operations = {
337         .d_release = ovl_dentry_release,
338         .d_select_inode = ovl_d_select_inode,
339 };
340
341 static const struct dentry_operations ovl_reval_dentry_operations = {
342         .d_release = ovl_dentry_release,
343         .d_revalidate = ovl_dentry_revalidate,
344         .d_weak_revalidate = ovl_dentry_weak_revalidate,
345 };
346
347 static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
348 {
349         size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
350         struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
351
352         if (oe)
353                 oe->numlower = numlower;
354
355         return oe;
356 }
357
358 static bool ovl_dentry_remote(struct dentry *dentry)
359 {
360         return dentry->d_flags &
361                 (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
362 }
363
364 static bool ovl_dentry_weird(struct dentry *dentry)
365 {
366         return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
367                                   DCACHE_MANAGE_TRANSIT |
368                                   DCACHE_OP_HASH |
369                                   DCACHE_OP_COMPARE);
370 }
371
372 static inline struct dentry *ovl_lookup_real(struct dentry *dir,
373                                              struct qstr *name)
374 {
375         struct dentry *dentry;
376
377         mutex_lock(&dir->d_inode->i_mutex);
378         dentry = lookup_one_len(name->name, dir, name->len);
379         mutex_unlock(&dir->d_inode->i_mutex);
380
381         if (IS_ERR(dentry)) {
382                 if (PTR_ERR(dentry) == -ENOENT)
383                         dentry = NULL;
384         } else if (!dentry->d_inode) {
385                 dput(dentry);
386                 dentry = NULL;
387         } else if (ovl_dentry_weird(dentry)) {
388                 dput(dentry);
389                 /* Don't support traversing automounts and other weirdness */
390                 dentry = ERR_PTR(-EREMOTE);
391         }
392         return dentry;
393 }
394
395 /*
396  * Returns next layer in stack starting from top.
397  * Returns -1 if this is the last layer.
398  */
399 int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
400 {
401         struct ovl_entry *oe = dentry->d_fsdata;
402
403         BUG_ON(idx < 0);
404         if (idx == 0) {
405                 ovl_path_upper(dentry, path);
406                 if (path->dentry)
407                         return oe->numlower ? 1 : -1;
408                 idx++;
409         }
410         BUG_ON(idx > oe->numlower);
411         *path = oe->lowerstack[idx - 1];
412
413         return (idx < oe->numlower) ? idx + 1 : -1;
414 }
415
416 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
417                           unsigned int flags)
418 {
419         struct ovl_entry *oe;
420         struct ovl_entry *poe = dentry->d_parent->d_fsdata;
421         struct path *stack = NULL;
422         struct dentry *upperdir, *upperdentry = NULL;
423         unsigned int ctr = 0;
424         struct inode *inode = NULL;
425         bool upperopaque = false;
426         struct dentry *this, *prev = NULL;
427         unsigned int i;
428         int err;
429
430         upperdir = ovl_upperdentry_dereference(poe);
431         if (upperdir) {
432                 this = ovl_lookup_real(upperdir, &dentry->d_name);
433                 err = PTR_ERR(this);
434                 if (IS_ERR(this))
435                         goto out;
436
437                 if (this) {
438                         if (unlikely(ovl_dentry_remote(this))) {
439                                 dput(this);
440                                 err = -EREMOTE;
441                                 goto out;
442                         }
443                         if (ovl_is_whiteout(this)) {
444                                 dput(this);
445                                 this = NULL;
446                                 upperopaque = true;
447                         } else if (poe->numlower && ovl_is_opaquedir(this)) {
448                                 upperopaque = true;
449                         }
450                 }
451                 upperdentry = prev = this;
452         }
453
454         if (!upperopaque && poe->numlower) {
455                 err = -ENOMEM;
456                 stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL);
457                 if (!stack)
458                         goto out_put_upper;
459         }
460
461         for (i = 0; !upperopaque && i < poe->numlower; i++) {
462                 bool opaque = false;
463                 struct path lowerpath = poe->lowerstack[i];
464
465                 this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name);
466                 err = PTR_ERR(this);
467                 if (IS_ERR(this)) {
468                         /*
469                          * If it's positive, then treat ENAMETOOLONG as ENOENT.
470                          */
471                         if (err == -ENAMETOOLONG && (upperdentry || ctr))
472                                 continue;
473                         goto out_put;
474                 }
475                 if (!this)
476                         continue;
477                 if (ovl_is_whiteout(this)) {
478                         dput(this);
479                         break;
480                 }
481                 /*
482                  * Only makes sense to check opaque dir if this is not the
483                  * lowermost layer.
484                  */
485                 if (i < poe->numlower - 1 && ovl_is_opaquedir(this))
486                         opaque = true;
487
488                 if (prev && (!S_ISDIR(prev->d_inode->i_mode) ||
489                              !S_ISDIR(this->d_inode->i_mode))) {
490                         /*
491                          * FIXME: check for upper-opaqueness maybe better done
492                          * in remove code.
493                          */
494                         if (prev == upperdentry)
495                                 upperopaque = true;
496                         dput(this);
497                         break;
498                 }
499                 /*
500                  * If this is a non-directory then stop here.
501                  */
502                 if (!S_ISDIR(this->d_inode->i_mode))
503                         opaque = true;
504
505                 stack[ctr].dentry = this;
506                 stack[ctr].mnt = lowerpath.mnt;
507                 ctr++;
508                 prev = this;
509                 if (opaque)
510                         break;
511         }
512
513         oe = ovl_alloc_entry(ctr);
514         err = -ENOMEM;
515         if (!oe)
516                 goto out_put;
517
518         if (upperdentry || ctr) {
519                 struct dentry *realdentry;
520
521                 realdentry = upperdentry ? upperdentry : stack[0].dentry;
522
523                 err = -ENOMEM;
524                 inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
525                                       oe);
526                 if (!inode)
527                         goto out_free_oe;
528                 ovl_copyattr(realdentry->d_inode, inode);
529         }
530
531         oe->opaque = upperopaque;
532         oe->__upperdentry = upperdentry;
533         memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
534         kfree(stack);
535         dentry->d_fsdata = oe;
536         d_add(dentry, inode);
537
538         return NULL;
539
540 out_free_oe:
541         kfree(oe);
542 out_put:
543         for (i = 0; i < ctr; i++)
544                 dput(stack[i].dentry);
545         kfree(stack);
546 out_put_upper:
547         dput(upperdentry);
548 out:
549         return ERR_PTR(err);
550 }
551
552 struct file *ovl_path_open(struct path *path, int flags)
553 {
554         return dentry_open(path, flags, current_cred());
555 }
556
557 static void ovl_put_super(struct super_block *sb)
558 {
559         struct ovl_fs *ufs = sb->s_fs_info;
560         unsigned i;
561
562         dput(ufs->workdir);
563         mntput(ufs->upper_mnt);
564         for (i = 0; i < ufs->numlower; i++)
565                 mntput(ufs->lower_mnt[i]);
566         kfree(ufs->lower_mnt);
567
568         kfree(ufs->config.lowerdir);
569         kfree(ufs->config.upperdir);
570         kfree(ufs->config.workdir);
571         kfree(ufs);
572 }
573
574 /**
575  * ovl_statfs
576  * @sb: The overlayfs super block
577  * @buf: The struct kstatfs to fill in with stats
578  *
579  * Get the filesystem statistics.  As writes always target the upper layer
580  * filesystem pass the statfs to the upper filesystem (if it exists)
581  */
582 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
583 {
584         struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
585         struct dentry *root_dentry = dentry->d_sb->s_root;
586         struct path path;
587         int err;
588
589         ovl_path_real(root_dentry, &path);
590
591         err = vfs_statfs(&path, buf);
592         if (!err) {
593                 buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen);
594                 buf->f_type = OVERLAYFS_SUPER_MAGIC;
595         }
596
597         return err;
598 }
599
600 /**
601  * ovl_show_options
602  *
603  * Prints the mount options for a given superblock.
604  * Returns zero; does not fail.
605  */
606 static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
607 {
608         struct super_block *sb = dentry->d_sb;
609         struct ovl_fs *ufs = sb->s_fs_info;
610
611         seq_show_option(m, "lowerdir", ufs->config.lowerdir);
612         if (ufs->config.upperdir) {
613                 seq_show_option(m, "upperdir", ufs->config.upperdir);
614                 seq_show_option(m, "workdir", ufs->config.workdir);
615         }
616         if (ufs->config.default_permissions)
617                 seq_puts(m, ",default_permissions");
618         return 0;
619 }
620
621 static int ovl_remount(struct super_block *sb, int *flags, char *data)
622 {
623         struct ovl_fs *ufs = sb->s_fs_info;
624
625         if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
626                 return -EROFS;
627
628         return 0;
629 }
630
631 static const struct super_operations ovl_super_operations = {
632         .put_super      = ovl_put_super,
633         .statfs         = ovl_statfs,
634         .show_options   = ovl_show_options,
635         .remount_fs     = ovl_remount,
636 };
637
638 enum {
639         OPT_LOWERDIR,
640         OPT_UPPERDIR,
641         OPT_WORKDIR,
642         OPT_DEFAULT_PERMISSIONS,
643         OPT_ERR,
644 };
645
646 static const match_table_t ovl_tokens = {
647         {OPT_LOWERDIR,                  "lowerdir=%s"},
648         {OPT_UPPERDIR,                  "upperdir=%s"},
649         {OPT_WORKDIR,                   "workdir=%s"},
650         {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
651         {OPT_ERR,                       NULL}
652 };
653
654 static char *ovl_next_opt(char **s)
655 {
656         char *sbegin = *s;
657         char *p;
658
659         if (sbegin == NULL)
660                 return NULL;
661
662         for (p = sbegin; *p; p++) {
663                 if (*p == '\\') {
664                         p++;
665                         if (!*p)
666                                 break;
667                 } else if (*p == ',') {
668                         *p = '\0';
669                         *s = p + 1;
670                         return sbegin;
671                 }
672         }
673         *s = NULL;
674         return sbegin;
675 }
676
677 static int ovl_parse_opt(char *opt, struct ovl_config *config)
678 {
679         char *p;
680
681         while ((p = ovl_next_opt(&opt)) != NULL) {
682                 int token;
683                 substring_t args[MAX_OPT_ARGS];
684
685                 if (!*p)
686                         continue;
687
688                 token = match_token(p, ovl_tokens, args);
689                 switch (token) {
690                 case OPT_UPPERDIR:
691                         kfree(config->upperdir);
692                         config->upperdir = match_strdup(&args[0]);
693                         if (!config->upperdir)
694                                 return -ENOMEM;
695                         break;
696
697                 case OPT_LOWERDIR:
698                         kfree(config->lowerdir);
699                         config->lowerdir = match_strdup(&args[0]);
700                         if (!config->lowerdir)
701                                 return -ENOMEM;
702                         break;
703
704                 case OPT_WORKDIR:
705                         kfree(config->workdir);
706                         config->workdir = match_strdup(&args[0]);
707                         if (!config->workdir)
708                                 return -ENOMEM;
709                         break;
710
711                 case OPT_DEFAULT_PERMISSIONS:
712                         config->default_permissions = true;
713                         break;
714
715                 default:
716                         pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
717                         return -EINVAL;
718                 }
719         }
720
721         /* Workdir is useless in non-upper mount */
722         if (!config->upperdir && config->workdir) {
723                 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
724                         config->workdir);
725                 kfree(config->workdir);
726                 config->workdir = NULL;
727         }
728
729         return 0;
730 }
731
732 #define OVL_WORKDIR_NAME "work"
733
734 static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
735                                          struct dentry *dentry)
736 {
737         struct inode *dir = dentry->d_inode;
738         struct dentry *work;
739         int err;
740         bool retried = false;
741
742         err = mnt_want_write(mnt);
743         if (err)
744                 return ERR_PTR(err);
745
746         mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
747 retry:
748         work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
749                               strlen(OVL_WORKDIR_NAME));
750
751         if (!IS_ERR(work)) {
752                 struct kstat stat = {
753                         .mode = S_IFDIR | 0,
754                 };
755
756                 if (work->d_inode) {
757                         err = -EEXIST;
758                         if (retried)
759                                 goto out_dput;
760
761                         retried = true;
762                         ovl_cleanup(dir, work);
763                         dput(work);
764                         goto retry;
765                 }
766
767                 err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
768                 if (err)
769                         goto out_dput;
770         }
771 out_unlock:
772         mutex_unlock(&dir->i_mutex);
773         mnt_drop_write(mnt);
774
775         return work;
776
777 out_dput:
778         dput(work);
779         work = ERR_PTR(err);
780         goto out_unlock;
781 }
782
783 static void ovl_unescape(char *s)
784 {
785         char *d = s;
786
787         for (;; s++, d++) {
788                 if (*s == '\\')
789                         s++;
790                 *d = *s;
791                 if (!*s)
792                         break;
793         }
794 }
795
796 static int ovl_mount_dir_noesc(const char *name, struct path *path)
797 {
798         int err = -EINVAL;
799
800         if (!*name) {
801                 pr_err("overlayfs: empty lowerdir\n");
802                 goto out;
803         }
804         err = kern_path(name, LOOKUP_FOLLOW, path);
805         if (err) {
806                 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
807                 goto out;
808         }
809         err = -EINVAL;
810         if (ovl_dentry_weird(path->dentry)) {
811                 pr_err("overlayfs: filesystem on '%s' not supported\n", name);
812                 goto out_put;
813         }
814         if (!S_ISDIR(path->dentry->d_inode->i_mode)) {
815                 pr_err("overlayfs: '%s' not a directory\n", name);
816                 goto out_put;
817         }
818         return 0;
819
820 out_put:
821         path_put(path);
822 out:
823         return err;
824 }
825
826 static int ovl_mount_dir(const char *name, struct path *path)
827 {
828         int err = -ENOMEM;
829         char *tmp = kstrdup(name, GFP_KERNEL);
830
831         if (tmp) {
832                 ovl_unescape(tmp);
833                 err = ovl_mount_dir_noesc(tmp, path);
834
835                 if (!err)
836                         if (ovl_dentry_remote(path->dentry)) {
837                                 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
838                                        tmp);
839                                 path_put(path);
840                                 err = -EINVAL;
841                         }
842                 kfree(tmp);
843         }
844         return err;
845 }
846
847 static int ovl_lower_dir(const char *name, struct path *path, long *namelen,
848                          int *stack_depth, bool *remote)
849 {
850         int err;
851         struct kstatfs statfs;
852
853         err = ovl_mount_dir_noesc(name, path);
854         if (err)
855                 goto out;
856
857         err = vfs_statfs(path, &statfs);
858         if (err) {
859                 pr_err("overlayfs: statfs failed on '%s'\n", name);
860                 goto out_put;
861         }
862         *namelen = max(*namelen, statfs.f_namelen);
863         *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
864
865         if (ovl_dentry_remote(path->dentry))
866                 *remote = true;
867
868         return 0;
869
870 out_put:
871         path_put(path);
872 out:
873         return err;
874 }
875
876 /* Workdir should not be subdir of upperdir and vice versa */
877 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
878 {
879         bool ok = false;
880
881         if (workdir != upperdir) {
882                 ok = (lock_rename(workdir, upperdir) == NULL);
883                 unlock_rename(workdir, upperdir);
884         }
885         return ok;
886 }
887
888 static unsigned int ovl_split_lowerdirs(char *str)
889 {
890         unsigned int ctr = 1;
891         char *s, *d;
892
893         for (s = d = str;; s++, d++) {
894                 if (*s == '\\') {
895                         s++;
896                 } else if (*s == ':') {
897                         *d = '\0';
898                         ctr++;
899                         continue;
900                 }
901                 *d = *s;
902                 if (!*s)
903                         break;
904         }
905         return ctr;
906 }
907
908 static int ovl_fill_super(struct super_block *sb, void *data, int silent)
909 {
910         struct path upperpath = { NULL, NULL };
911         struct path workpath = { NULL, NULL };
912         struct dentry *root_dentry;
913         struct ovl_entry *oe;
914         struct ovl_fs *ufs;
915         struct path *stack = NULL;
916         char *lowertmp;
917         char *lower;
918         unsigned int numlower;
919         unsigned int stacklen = 0;
920         unsigned int i;
921         bool remote = false;
922         int err;
923
924         err = -ENOMEM;
925         ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
926         if (!ufs)
927                 goto out;
928
929         err = ovl_parse_opt((char *) data, &ufs->config);
930         if (err)
931                 goto out_free_config;
932
933         err = -EINVAL;
934         if (!ufs->config.lowerdir) {
935                 pr_err("overlayfs: missing 'lowerdir'\n");
936                 goto out_free_config;
937         }
938
939         sb->s_stack_depth = 0;
940         sb->s_maxbytes = MAX_LFS_FILESIZE;
941         if (ufs->config.upperdir) {
942                 if (!ufs->config.workdir) {
943                         pr_err("overlayfs: missing 'workdir'\n");
944                         goto out_free_config;
945                 }
946
947                 err = ovl_mount_dir(ufs->config.upperdir, &upperpath);
948                 if (err)
949                         goto out_free_config;
950
951                 /* Upper fs should not be r/o */
952                 if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
953                         pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
954                         err = -EINVAL;
955                         goto out_put_upperpath;
956                 }
957
958                 err = ovl_mount_dir(ufs->config.workdir, &workpath);
959                 if (err)
960                         goto out_put_upperpath;
961
962                 err = -EINVAL;
963                 if (upperpath.mnt != workpath.mnt) {
964                         pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
965                         goto out_put_workpath;
966                 }
967                 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
968                         pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
969                         goto out_put_workpath;
970                 }
971                 sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
972         }
973         err = -ENOMEM;
974         lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
975         if (!lowertmp)
976                 goto out_put_workpath;
977
978         err = -EINVAL;
979         stacklen = ovl_split_lowerdirs(lowertmp);
980         if (stacklen > OVL_MAX_STACK) {
981                 pr_err("overlayfs: too many lower directries, limit is %d\n",
982                        OVL_MAX_STACK);
983                 goto out_free_lowertmp;
984         } else if (!ufs->config.upperdir && stacklen == 1) {
985                 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
986                 goto out_free_lowertmp;
987         }
988
989         stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
990         if (!stack)
991                 goto out_free_lowertmp;
992
993         lower = lowertmp;
994         for (numlower = 0; numlower < stacklen; numlower++) {
995                 err = ovl_lower_dir(lower, &stack[numlower],
996                                     &ufs->lower_namelen, &sb->s_stack_depth,
997                                     &remote);
998                 if (err)
999                         goto out_put_lowerpath;
1000
1001                 lower = strchr(lower, '\0') + 1;
1002         }
1003
1004         err = -EINVAL;
1005         sb->s_stack_depth++;
1006         if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1007                 pr_err("overlayfs: maximum fs stacking depth exceeded\n");
1008                 goto out_put_lowerpath;
1009         }
1010
1011         if (ufs->config.upperdir) {
1012                 ufs->upper_mnt = clone_private_mount(&upperpath);
1013                 err = PTR_ERR(ufs->upper_mnt);
1014                 if (IS_ERR(ufs->upper_mnt)) {
1015                         pr_err("overlayfs: failed to clone upperpath\n");
1016                         goto out_put_lowerpath;
1017                 }
1018
1019                 ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
1020                 err = PTR_ERR(ufs->workdir);
1021                 if (IS_ERR(ufs->workdir)) {
1022                         pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
1023                                 ufs->config.workdir, OVL_WORKDIR_NAME, -err);
1024                         sb->s_flags |= MS_RDONLY;
1025                         ufs->workdir = NULL;
1026                 }
1027         }
1028
1029         err = -ENOMEM;
1030         ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL);
1031         if (ufs->lower_mnt == NULL)
1032                 goto out_put_workdir;
1033         for (i = 0; i < numlower; i++) {
1034                 struct vfsmount *mnt = clone_private_mount(&stack[i]);
1035
1036                 err = PTR_ERR(mnt);
1037                 if (IS_ERR(mnt)) {
1038                         pr_err("overlayfs: failed to clone lowerpath\n");
1039                         goto out_put_lower_mnt;
1040                 }
1041                 /*
1042                  * Make lower_mnt R/O.  That way fchmod/fchown on lower file
1043                  * will fail instead of modifying lower fs.
1044                  */
1045                 mnt->mnt_flags |= MNT_READONLY;
1046
1047                 ufs->lower_mnt[ufs->numlower] = mnt;
1048                 ufs->numlower++;
1049         }
1050
1051         /* If the upper fs is nonexistent, we mark overlayfs r/o too */
1052         if (!ufs->upper_mnt)
1053                 sb->s_flags |= MS_RDONLY;
1054
1055         if (remote)
1056                 sb->s_d_op = &ovl_reval_dentry_operations;
1057         else
1058                 sb->s_d_op = &ovl_dentry_operations;
1059
1060         err = -ENOMEM;
1061         oe = ovl_alloc_entry(numlower);
1062         if (!oe)
1063                 goto out_put_lower_mnt;
1064
1065         root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, oe));
1066         if (!root_dentry)
1067                 goto out_free_oe;
1068
1069         mntput(upperpath.mnt);
1070         for (i = 0; i < numlower; i++)
1071                 mntput(stack[i].mnt);
1072         path_put(&workpath);
1073         kfree(lowertmp);
1074
1075         oe->__upperdentry = upperpath.dentry;
1076         for (i = 0; i < numlower; i++) {
1077                 oe->lowerstack[i].dentry = stack[i].dentry;
1078                 oe->lowerstack[i].mnt = ufs->lower_mnt[i];
1079         }
1080         kfree(stack);
1081
1082         root_dentry->d_fsdata = oe;
1083
1084         ovl_copyattr(ovl_dentry_real(root_dentry)->d_inode,
1085                      root_dentry->d_inode);
1086
1087         sb->s_magic = OVERLAYFS_SUPER_MAGIC;
1088         sb->s_op = &ovl_super_operations;
1089         sb->s_root = root_dentry;
1090         sb->s_fs_info = ufs;
1091
1092         return 0;
1093
1094 out_free_oe:
1095         kfree(oe);
1096 out_put_lower_mnt:
1097         for (i = 0; i < ufs->numlower; i++)
1098                 mntput(ufs->lower_mnt[i]);
1099         kfree(ufs->lower_mnt);
1100 out_put_workdir:
1101         dput(ufs->workdir);
1102         mntput(ufs->upper_mnt);
1103 out_put_lowerpath:
1104         for (i = 0; i < numlower; i++)
1105                 path_put(&stack[i]);
1106         kfree(stack);
1107 out_free_lowertmp:
1108         kfree(lowertmp);
1109 out_put_workpath:
1110         path_put(&workpath);
1111 out_put_upperpath:
1112         path_put(&upperpath);
1113 out_free_config:
1114         kfree(ufs->config.lowerdir);
1115         kfree(ufs->config.upperdir);
1116         kfree(ufs->config.workdir);
1117         kfree(ufs);
1118 out:
1119         return err;
1120 }
1121
1122 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
1123                                 const char *dev_name, void *raw_data)
1124 {
1125         return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
1126 }
1127
1128 static struct file_system_type ovl_fs_type = {
1129         .owner          = THIS_MODULE,
1130         .name           = "overlay",
1131         .mount          = ovl_mount,
1132         .kill_sb        = kill_anon_super,
1133 };
1134 MODULE_ALIAS_FS("overlay");
1135
1136 static int __init ovl_init(void)
1137 {
1138         return register_filesystem(&ovl_fs_type);
1139 }
1140
1141 static void __exit ovl_exit(void)
1142 {
1143         unregister_filesystem(&ovl_fs_type);
1144 }
1145
1146 module_init(ovl_init);
1147 module_exit(ovl_exit);