X-Git-Url: http://git.cascardo.info/?p=cascardo%2Flinux.git;a=blobdiff_plain;f=drivers%2Fstaging%2Flustre%2Flustre%2Fllite%2Ffile.c;h=e1d784bae06494e0ae477cbd4818d1fea585b698;hp=23249b3e6cad1443efb4d9dbfde574bf26495480;hb=97d2116708ca0fd6ad8b00811ee4349b7e19e96f;hpb=e55f1d1d13e7f1c364672d667d78fd1f640ab9f9 diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 23249b3e6cad..e1d784bae064 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -38,14 +38,15 @@ #define DEBUG_SUBSYSTEM S_LLITE #include "../include/lustre_dlm.h" -#include "../include/lustre_lite.h" #include #include +#include #include -#include "llite_internal.h" #include "../include/lustre/ll_fiemap.h" +#include "../include/lustre/lustre_ioctl.h" #include "../include/cl_object.h" +#include "llite_internal.h" static int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg); @@ -188,17 +189,11 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, spin_unlock(&lli->lli_lock); } - if (rc == 0) { - rc = ll_objects_destroy(req, inode); - if (rc) - CERROR("inode %lu ll_objects destroy: rc = %d\n", - inode->i_ino, rc); - } if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) { struct mdt_body *body; body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!(body->valid & OBD_MD_FLRELEASED)) + if (!(body->mbo_valid & OBD_MD_FLRELEASED)) rc = -EBUSY; } @@ -349,13 +344,11 @@ int ll_file_release(struct inode *inode, struct file *file) fd = LUSTRE_FPRIVATE(file); LASSERT(fd); - /* The last ref on @file, maybe not be the owner pid of statahead. - * Different processes can open the same dir, "ll_opendir_key" means: - * it is me that should stop the statahead thread. + /* The last ref on @file, maybe not be the owner pid of statahead, + * because parent and child process can share the same file handle. */ - if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd && - lli->lli_opendir_pid != 0) - ll_stop_statahead(inode, lli->lli_opendir_key); + if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd) + ll_deauthorize_statahead(inode, fd); if (is_root_inode(inode)) { LUSTRE_FPRIVATE(file) = NULL; @@ -364,7 +357,8 @@ int ll_file_release(struct inode *inode, struct file *file) } if (!S_ISDIR(inode->i_mode)) { - lov_read_and_clear_async_rc(lli->lli_clob); + if (lli->lli_clob) + lov_read_and_clear_async_rc(lli->lli_clob); lli->lli_async_rc = 0; } @@ -376,55 +370,39 @@ int ll_file_release(struct inode *inode, struct file *file) return rc; } -static int ll_intent_file_open(struct dentry *dentry, void *lmm, - int lmmsize, struct lookup_intent *itp) +static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize, + struct lookup_intent *itp) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(de); struct ll_sb_info *sbi = ll_i2sbi(inode); - struct dentry *parent = dentry->d_parent; - const char *name = dentry->d_name.name; - const int len = dentry->d_name.len; + struct dentry *parent = de->d_parent; + const char *name = NULL; struct md_op_data *op_data; - struct ptlrpc_request *req; - __u32 opc = LUSTRE_OPC_ANY; - int rc; + struct ptlrpc_request *req = NULL; + int len = 0, rc; - /* Usually we come here only for NFSD, and we want open lock. */ - /* We can also get here if there was cached open handle in revalidate_it - * but it disappeared while we were getting from there to ll_file_open. - * But this means this file was closed and immediately opened which - * makes a good candidate for using OPEN lock - */ - /* If lmmsize & lmm are not 0, we are just setting stripe info - * parameters. No need for the open lock + LASSERT(parent); + LASSERT(itp->it_flags & MDS_OPEN_BY_FID); + + /* + * if server supports open-by-fid, or file name is invalid, don't pack + * name in open request */ - if (!lmm && lmmsize == 0) { - struct ll_dentry_data *ldd = ll_d2d(dentry); - /* - * If we came via ll_iget_for_nfs, then we need to request - * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry); - * - * NB: when ldd is NULL, it must have come via normal - * lookup path only, since ll_iget_for_nfs always calls - * ll_d_init(). - */ - if (ldd && ldd->lld_nfs_dentry) { - ldd->lld_nfs_dentry = 0; - itp->it_flags |= MDS_OPEN_LOCK; - } - if (itp->it_flags & FMODE_WRITE) - opc = LUSTRE_OPC_CREATE; + if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) && + lu_name_is_valid_2(de->d_name.name, de->d_name.len)) { + name = de->d_name.name; + len = de->d_name.len; } - op_data = ll_prep_md_op_data(NULL, d_inode(parent), - inode, name, len, - O_RDWR, opc, NULL); + op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len, + O_RDWR, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); + op_data->op_data = lmm; + op_data->op_data_size = lmmsize; - itp->it_flags |= MDS_OPEN_BY_FID; - rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp, - 0 /*unused */, &req, ll_md_blocking_ast, 0); + rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req, + &ll_md_blocking_ast, 0); ll_finish_md_op_data(op_data); if (rc == -ESTALE) { /* reason for keep own exit path - don`t flood log @@ -479,8 +457,8 @@ static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, struct mdt_body *body; body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY); - och->och_fh = body->handle; - och->och_fid = body->fid1; + och->och_fh = body->mbo_handle; + och->och_fid = body->mbo_fid1; och->och_lease_handle.cookie = it->it_lock_handle; och->och_magic = OBD_CLIENT_HANDLE_MAGIC; och->och_flags = it->it_flags; @@ -508,7 +486,7 @@ static int ll_local_open(struct file *file, struct lookup_intent *it, body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY); - ll_ioepoch_open(lli, body->ioepoch); + ll_ioepoch_open(lli, body->mbo_ioepoch); } LUSTRE_FPRIVATE(file) = fd; @@ -543,7 +521,7 @@ int ll_file_open(struct inode *inode, struct file *file) struct obd_client_handle **och_p = NULL; __u64 *och_usecount = NULL; struct ll_file_data *fd; - int rc = 0, opendir_set = 0; + int rc = 0; CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n", PFID(ll_inode2fid(inode)), inode, file->f_flags); @@ -558,16 +536,8 @@ int ll_file_open(struct inode *inode, struct file *file) } fd->fd_file = file; - if (S_ISDIR(inode->i_mode)) { - spin_lock(&lli->lli_sa_lock); - if (!lli->lli_opendir_key && !lli->lli_sai && - lli->lli_opendir_pid == 0) { - lli->lli_opendir_key = fd; - lli->lli_opendir_pid = current_pid(); - opendir_set = 1; - } - spin_unlock(&lli->lli_sa_lock); - } + if (S_ISDIR(inode->i_mode)) + ll_authorize_statahead(inode, fd); if (is_root_inode(inode)) { LUSTRE_FPRIVATE(file) = fd; @@ -615,7 +585,7 @@ restart: } else if (it->it_flags & FMODE_EXEC) { och_p = &lli->lli_mds_exec_och; och_usecount = &lli->lli_open_fd_exec_count; - } else { + } else { och_p = &lli->lli_mds_read_och; och_usecount = &lli->lli_open_fd_read_count; } @@ -652,9 +622,19 @@ restart: * result in a deadlock */ mutex_unlock(&lli->lli_och_mutex); - it->it_create_mode |= M_CHECK_STALE; + /* + * Normally called under two situations: + * 1. NFS export. + * 2. revalidate with IT_OPEN (revalidate doesn't + * execute this intent any more). + * + * Always fetch MDS_OPEN_LOCK if this is not setstripe. + * + * Always specify MDS_OPEN_BY_FID because we don't want + * to get file with different fid. + */ + it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID; rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it); - it->it_create_mode &= ~M_CHECK_STALE; if (rc) goto out_openerr; @@ -716,9 +696,10 @@ out_och_free: mutex_unlock(&lli->lli_och_mutex); out_openerr: - if (opendir_set != 0) - ll_stop_statahead(inode, lli->lli_opendir_key); - ll_file_data_put(fd); + if (lli->lli_opendir_key == fd) + ll_deauthorize_statahead(inode, fd); + if (fd) + ll_file_data_put(fd); } else { ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1); } @@ -764,7 +745,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, struct lookup_intent it = { .it_op = IT_OPEN }; struct ll_sb_info *sbi = ll_i2sbi(inode); struct md_op_data *op_data; - struct ptlrpc_request *req; + struct ptlrpc_request *req = NULL; struct lustre_handle old_handle = { 0 }; struct obd_client_handle *och = NULL; int rc; @@ -831,8 +812,8 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, it.it_flags = fmode | open_flags; it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE; - rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req, - ll_md_blocking_lease_ast, + rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req, + &ll_md_blocking_lease_ast, /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise * it can be cancelled which may mislead applications that the lease is * broken; @@ -840,7 +821,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast * doesn't deal with openhandle, so normal openhandle will be leaked. */ - LDLM_FL_NO_LRU | LDLM_FL_EXCL); + LDLM_FL_NO_LRU | LDLM_FL_EXCL); ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); if (rc < 0) @@ -908,7 +889,6 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, { struct ldlm_lock *lock; bool cancelled = true; - int rc; lock = ldlm_handle2lock(&och->och_lease_handle); if (lock) { @@ -926,9 +906,8 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, if (lease_broken) *lease_broken = cancelled; - rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, - NULL); - return rc; + return ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, + inode, och, NULL); } /* Fills the obdo with the attributes for the lsm */ @@ -1138,10 +1117,11 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, { struct ll_inode_info *lli = ll_i2info(file_inode(file)); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct range_lock range; struct cl_io *io; ssize_t result; - CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zd\n", + CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n", file, iot, *ppos, count); restart: @@ -1150,41 +1130,45 @@ restart: if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { struct vvp_io *vio = vvp_env_io(env); - int write_mutex_locked = 0; + bool range_locked = false; + + if (file->f_flags & O_APPEND) + range_lock_init(&range, 0, LUSTRE_EOF); + else + range_lock_init(&range, *ppos, *ppos + count - 1); vio->vui_fd = LUSTRE_FPRIVATE(file); - vio->vui_io_subtype = args->via_io_subtype; - - switch (vio->vui_io_subtype) { - case IO_NORMAL: - vio->vui_iter = args->u.normal.via_iter; - vio->vui_iocb = args->u.normal.via_iocb; - if ((iot == CIT_WRITE) && - !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - if (mutex_lock_interruptible(&lli-> - lli_write_mutex)) { - result = -ERESTARTSYS; - goto out; - } - write_mutex_locked = 1; - } - down_read(&lli->lli_trunc_sem); - break; - case IO_SPLICE: - vio->u.splice.vui_pipe = args->u.splice.via_pipe; - vio->u.splice.vui_flags = args->u.splice.via_flags; - break; - default: - CERROR("Unknown IO type - %u\n", vio->vui_io_subtype); - LBUG(); + vio->vui_iter = args->u.normal.via_iter; + vio->vui_iocb = args->u.normal.via_iocb; + /* + * Direct IO reads must also take range lock, + * or multiple reads will try to work on the same pages + * See LU-6227 for details. + */ + if (((iot == CIT_WRITE) || + (iot == CIT_READ && (file->f_flags & O_DIRECT))) && + !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n", + range.rl_node.in_extent.start, + range.rl_node.in_extent.end); + result = range_lock(&lli->lli_write_tree, + &range); + if (result < 0) + goto out; + + range_locked = true; } + down_read(&lli->lli_trunc_sem); ll_cl_add(file, env, io); result = cl_io_loop(env, io); ll_cl_remove(file, env); - if (args->via_io_subtype == IO_NORMAL) - up_read(&lli->lli_trunc_sem); - if (write_mutex_locked) - mutex_unlock(&lli->lli_write_mutex); + up_read(&lli->lli_trunc_sem); + if (range_locked) { + CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n", + range.rl_node.in_extent.start, + range.rl_node.in_extent.end); + range_unlock(&lli->lli_write_tree, &range); + } } else { /* cl_io_rw_init() handled IO */ result = io->ci_result; @@ -1201,7 +1185,7 @@ out: * short read/write instead of restart io. */ if ((result == 0 || result == -ENODATA) && io->ci_need_restart) { - CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zd\n", + CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zu\n", iot == CIT_READ ? "read" : "write", file, *ppos, count); LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob); @@ -1237,7 +1221,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (IS_ERR(env)) return PTR_ERR(env); - args = ll_env_args(env, IO_NORMAL); + args = ll_env_args(env); args->u.normal.via_iter = to; args->u.normal.via_iocb = iocb; @@ -1261,7 +1245,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (IS_ERR(env)) return PTR_ERR(env); - args = ll_env_args(env, IO_NORMAL); + args = ll_env_args(env); args->u.normal.via_iter = from; args->u.normal.via_iocb = iocb; @@ -1271,119 +1255,15 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) return result; } -/* - * Send file content (through pagecache) somewhere with helper - */ -static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t count, - unsigned int flags) -{ - struct lu_env *env; - struct vvp_io_args *args; - ssize_t result; - int refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - args = ll_env_args(env, IO_SPLICE); - args->u.splice.via_pipe = pipe; - args->u.splice.via_flags = flags; - - result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count); - cl_env_put(env, &refcheck); - return result; -} - -static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, u32 ost_idx) -{ - struct obd_export *exp = ll_i2dtexp(inode); - struct obd_trans_info oti = { 0 }; - struct obdo *oa = NULL; - int lsm_size; - int rc = 0; - struct lov_stripe_md *lsm = NULL, *lsm2; - - oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); - if (!oa) - return -ENOMEM; - - lsm = ccc_inode_lsm_get(inode); - if (!lsm_has_objects(lsm)) { - rc = -ENOENT; - goto out; - } - - lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) * - (lsm->lsm_stripe_count)); - - lsm2 = libcfs_kvzalloc(lsm_size, GFP_NOFS); - if (!lsm2) { - rc = -ENOMEM; - goto out; - } - - oa->o_oi = *oi; - oa->o_nlink = ost_idx; - oa->o_flags |= OBD_FL_RECREATE_OBJS; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP; - obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); - memcpy(lsm2, lsm, lsm_size); - ll_inode_size_lock(inode); - rc = obd_create(NULL, exp, oa, &lsm2, &oti); - ll_inode_size_unlock(inode); - - kvfree(lsm2); - goto out; -out: - ccc_inode_lsm_put(inode, lsm); - kmem_cache_free(obdo_cachep, oa); - return rc; -} - -static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg) -{ - struct ll_recreate_obj ucreat; - struct ost_id oi; - - if (!capable(CFS_CAP_SYS_ADMIN)) - return -EPERM; - - if (copy_from_user(&ucreat, (struct ll_recreate_obj __user *)arg, - sizeof(ucreat))) - return -EFAULT; - - ostid_set_seq_mdt0(&oi); - ostid_set_id(&oi, ucreat.lrc_id); - return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx); -} - -static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg) -{ - struct lu_fid fid; - struct ost_id oi; - u32 ost_idx; - - if (!capable(CFS_CAP_SYS_ADMIN)) - return -EPERM; - - if (copy_from_user(&fid, (struct lu_fid __user *)arg, sizeof(fid))) - return -EFAULT; - - fid_to_ostid(&fid, &oi); - ost_idx = (fid_seq(&fid) >> 16) & 0xffff; - return ll_lov_recreate(inode, &oi, ost_idx); -} - int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, __u64 flags, struct lov_user_md *lum, int lum_size) { struct lov_stripe_md *lsm = NULL; - struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; + struct lookup_intent oit = { + .it_op = IT_OPEN, + .it_flags = flags | MDS_OPEN_BY_FID, + }; int rc = 0; lsm = ccc_inode_lsm_get(inode); @@ -1397,11 +1277,11 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, ll_inode_size_lock(inode); rc = ll_intent_file_open(dentry, lum, lum_size, &oit); - if (rc) + if (rc < 0) goto out_unlock; rc = oit.it_status; if (rc < 0) - goto out_req_free; + goto out_unlock; ll_release_openhandle(inode, &oit); @@ -1411,9 +1291,6 @@ out_unlock: ccc_inode_lsm_put(inode, lsm); out: return rc; -out_req_free: - ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request); - goto out; } int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, @@ -1448,9 +1325,9 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - lmmsize = body->eadatasize; + lmmsize = body->mbo_eadatasize; - if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) || + if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) || lmmsize == 0) { rc = -ENODATA; goto out; @@ -1481,13 +1358,13 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, */ if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) { lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); - if (S_ISREG(body->mode)) + if (S_ISREG(body->mbo_mode)) lustre_swab_lov_user_md_objects( ((struct lov_user_md_v1 *)lmm)->lmm_objects, stripe_count); } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) { lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); - if (S_ISREG(body->mode)) + if (S_ISREG(body->mbo_mode)) lustre_swab_lov_user_md_objects( ((struct lov_user_md_v3 *)lmm)->lmm_objects, stripe_count); @@ -1530,55 +1407,48 @@ static int ll_lov_setea(struct inode *inode, struct file *file, return rc; } +static int ll_file_getstripe(struct inode *inode, + struct lov_user_md __user *lum) +{ + struct lu_env *env; + int refcheck; + int rc; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum); + cl_env_put(env, &refcheck); + return rc; +} + static int ll_lov_setstripe(struct inode *inode, struct file *file, unsigned long arg) { - struct lov_user_md_v3 lumv3; - struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3; - struct lov_user_md_v1 __user *lumv1p = (void __user *)arg; - struct lov_user_md_v3 __user *lumv3p = (void __user *)arg; + struct lov_user_md __user *lum = (struct lov_user_md __user *)arg; + struct lov_user_md *klum; int lum_size, rc; __u64 flags = FMODE_WRITE; - /* first try with v1 which is smaller than v3 */ - lum_size = sizeof(struct lov_user_md_v1); - if (copy_from_user(lumv1, lumv1p, lum_size)) - return -EFAULT; - - if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) { - lum_size = sizeof(struct lov_user_md_v3); - if (copy_from_user(&lumv3, lumv3p, lum_size)) - return -EFAULT; - } + rc = ll_copy_user_md(lum, &klum); + if (rc < 0) + return rc; - rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lumv1, + lum_size = rc; + rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, klum, lum_size); cl_lov_delay_create_clear(&file->f_flags); if (rc == 0) { - struct lov_stripe_md *lsm; __u32 gen; - put_user(0, &lumv1p->lmm_stripe_count); + put_user(0, &lum->lmm_stripe_count); ll_layout_refresh(inode, &gen); - lsm = ccc_inode_lsm_get(inode); - rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), - 0, lsm, (void __user *)arg); - ccc_inode_lsm_put(inode, lsm); + rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg); } - return rc; -} -static int ll_lov_getstripe(struct inode *inode, unsigned long arg) -{ - struct lov_stripe_md *lsm; - int rc = -ENODATA; - - lsm = ccc_inode_lsm_get(inode); - if (lsm) - rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, - lsm, (void __user *)arg); - ccc_inode_lsm_put(inode, lsm); + kfree(klum); return rc; } @@ -2247,6 +2117,12 @@ free_hss: return rc; } +static inline long ll_lease_type_from_fmode(fmode_t fmode) +{ + return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) | + ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0); +} + static long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2314,11 +2190,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return rc; } case LL_IOC_LOV_GETSTRIPE: - return ll_lov_getstripe(inode, arg); - case LL_IOC_RECREATE_OBJ: - return ll_lov_recreate_obj(inode, arg); - case LL_IOC_RECREATE_FID: - return ll_lov_recreate_fid(inode, arg); + return ll_file_getstripe(inode, + (struct lov_user_md __user *)arg); case FSFILT_IOC_FIEMAP: return ll_ioctl_fiemap(inode, arg); case FSFILT_IOC_GETFLAGS: @@ -2349,6 +2222,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } + case LL_IOC_GETPARENT: + return ll_getparent(file, (struct getparent __user *)arg); case OBD_IOC_FID2PATH: return ll_fid2path(inode, (void __user *)arg); case LL_IOC_DATA_VERSION: { @@ -2451,20 +2326,20 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct ll_inode_info *lli = ll_i2info(inode); struct obd_client_handle *och = NULL; bool lease_broken; - fmode_t mode = 0; + fmode_t fmode; switch (arg) { - case F_WRLCK: + case LL_LEASE_WRLCK: if (!(file->f_mode & FMODE_WRITE)) return -EPERM; - mode = FMODE_WRITE; + fmode = FMODE_WRITE; break; - case F_RDLCK: + case LL_LEASE_RDLCK: if (!(file->f_mode & FMODE_READ)) return -EPERM; - mode = FMODE_READ; + fmode = FMODE_READ; break; - case F_UNLCK: + case LL_LEASE_UNLCK: mutex_lock(&lli->lli_och_mutex); if (fd->fd_lease_och) { och = fd->fd_lease_och; @@ -2472,26 +2347,26 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } mutex_unlock(&lli->lli_och_mutex); - if (och) { - mode = och->och_flags & - (FMODE_READ|FMODE_WRITE); - rc = ll_lease_close(och, inode, &lease_broken); - if (rc == 0 && lease_broken) - mode = 0; - } else { - rc = -ENOLCK; - } + if (!och) + return -ENOLCK; + + fmode = och->och_flags; + rc = ll_lease_close(och, inode, &lease_broken); + if (rc < 0) + return rc; + + if (lease_broken) + fmode = 0; - /* return the type of lease or error */ - return rc < 0 ? rc : (int)mode; + return ll_lease_type_from_fmode(fmode); default: return -EINVAL; } - CDEBUG(D_INODE, "Set lease with mode %d\n", mode); + CDEBUG(D_INODE, "Set lease with mode %u\n", fmode); /* apply for lease */ - och = ll_lease_open(inode, file, mode, 0); + och = ll_lease_open(inode, file, fmode, 0); if (IS_ERR(och)) return PTR_ERR(och); @@ -2512,8 +2387,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case LL_IOC_GET_LEASE: { struct ll_inode_info *lli = ll_i2info(inode); struct ldlm_lock *lock = NULL; + fmode_t fmode = 0; - rc = 0; mutex_lock(&lli->lli_och_mutex); if (fd->fd_lease_och) { struct obd_client_handle *och = fd->fd_lease_och; @@ -2522,14 +2397,13 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (lock) { lock_res_and_lock(lock); if (!ldlm_is_cancel(lock)) - rc = och->och_flags & - (FMODE_READ | FMODE_WRITE); + fmode = och->och_flags; unlock_res_and_lock(lock); LDLM_LOCK_PUT(lock); } } mutex_unlock(&lli->lli_och_mutex); - return rc; + return ll_lease_type_from_fmode(fmode); } case LL_IOC_HSM_IMPORT: { struct hsm_user_import *hui; @@ -2574,9 +2448,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) eof = i_size_read(inode); } - retval = generic_file_llseek_size(file, offset, origin, - ll_file_maxbytes(inode), eof); - return retval; + return generic_file_llseek_size(file, offset, origin, + ll_file_maxbytes(inode), eof); } static int ll_flush(struct file *file, fl_owner_t id) @@ -2593,9 +2466,11 @@ static int ll_flush(struct file *file, fl_owner_t id) */ rc = lli->lli_async_rc; lli->lli_async_rc = 0; - err = lov_read_and_clear_async_rc(lli->lli_clob); - if (rc == 0) - rc = err; + if (lli->lli_clob) { + err = lov_read_and_clear_async_rc(lli->lli_clob); + if (!rc) + rc = err; + } /* The application has been told about write failure already. * Do not report failure again. @@ -2714,6 +2589,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) struct md_op_data *op_data; struct lustre_handle lockh = {0}; ldlm_policy_data_t flock = { {0} }; + int fl_type = file_lock->fl_type; __u64 flags = 0; int rc; int rc2 = 0; @@ -2744,7 +2620,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner) flock.l_flock.owner = (unsigned long)file_lock->fl_pid; - switch (file_lock->fl_type) { + switch (fl_type) { case F_RDLCK: einfo.ei_mode = LCK_PR; break; @@ -2764,8 +2640,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) einfo.ei_mode = LCK_PW; break; default: - CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", - file_lock->fl_type); + CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type); return -ENOTSUPP; } @@ -2787,16 +2662,18 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) case F_GETLK64: #endif flags = LDLM_FL_TEST_LOCK; - /* Save the old mode so that if the mode in the lock changes we - * can decrement the appropriate reader or writer refcount. - */ - file_lock->fl_type = einfo.ei_mode; break; default: CERROR("unknown fcntl lock command: %d\n", cmd); return -EINVAL; } + /* + * Save the old mode so that if the mode in the lock changes we + * can decrement the appropriate reader or writer refcount. + */ + file_lock->fl_type = einfo.ei_mode; + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) @@ -2806,8 +2683,12 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); - rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, - op_data, &lockh, &flock, 0, NULL /* req */, flags); + rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh, + flags); + + /* Restore the file lock type if not TEST lock. */ + if (!(flags & LDLM_FL_TEST_LOCK)) + file_lock->fl_type = fl_type; if ((rc == 0 || file_lock->fl_type == F_UNLCK) && !(flags & LDLM_FL_TEST_LOCK)) @@ -2815,8 +2696,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) if (rc2 && file_lock->fl_type != F_UNLCK) { einfo.ei_mode = LCK_NL; - md_enqueue(sbi->ll_md_exp, &einfo, NULL, - op_data, &lockh, &flock, 0, NULL /* req */, flags); + md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, + &lockh, flags); rc = rc2; } @@ -2825,6 +2706,117 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) return rc; } +int ll_get_fid_by_name(struct inode *parent, const char *name, + int namelen, struct lu_fid *fid) +{ + struct md_op_data *op_data = NULL; + struct ptlrpc_request *req; + struct mdt_body *body; + int rc; + + op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + return PTR_ERR(op_data); + + op_data->op_valid = OBD_MD_FLID; + rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req); + ll_finish_md_op_data(op_data); + if (rc < 0) + return rc; + + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + if (!body) { + rc = -EFAULT; + goto out_req; + } + if (fid) + *fid = body->mbo_fid1; +out_req: + ptlrpc_req_finished(req); + return rc; +} + +int ll_migrate(struct inode *parent, struct file *file, int mdtidx, + const char *name, int namelen) +{ + struct ptlrpc_request *request = NULL; + struct inode *child_inode = NULL; + struct dentry *dchild = NULL; + struct md_op_data *op_data; + struct qstr qstr; + int rc; + + CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%d\n", + name, PFID(ll_inode2fid(parent)), mdtidx); + + op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, + 0, LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + return PTR_ERR(op_data); + + /* Get child FID first */ + qstr.hash = full_name_hash(parent, name, namelen); + qstr.name = name; + qstr.len = namelen; + dchild = d_lookup(file_dentry(file), &qstr); + if (dchild) { + op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); + if (dchild->d_inode) { + child_inode = igrab(dchild->d_inode); + if (child_inode) { + inode_lock(child_inode); + op_data->op_fid3 = *ll_inode2fid(child_inode); + ll_invalidate_aliases(child_inode); + } + } + dput(dchild); + } else { + rc = ll_get_fid_by_name(parent, name, namelen, + &op_data->op_fid3); + if (rc) + goto out_free; + } + + if (!fid_is_sane(&op_data->op_fid3)) { + CERROR("%s: migrate %s, but fid "DFID" is insane\n", + ll_get_fsname(parent->i_sb, NULL, 0), name, + PFID(&op_data->op_fid3)); + rc = -EINVAL; + goto out_free; + } + + rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3); + if (rc < 0) + goto out_free; + + if (rc == mdtidx) { + CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name, + PFID(&op_data->op_fid3), mdtidx); + rc = 0; + goto out_free; + } + + op_data->op_mds = mdtidx; + op_data->op_cli_flags = CLI_MIGRATE; + rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name, + namelen, name, namelen, &request); + if (!rc) + ll_update_times(request, parent); + + ptlrpc_req_finished(request); + +out_free: + if (child_inode) { + clear_nlink(child_inode); + inode_unlock(child_inode); + iput(child_inode); + } + + ll_finish_md_op_data(op_data); + return rc; +} + static int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock) { @@ -2847,7 +2839,7 @@ int ll_have_md_lock(struct inode *inode, __u64 *bits, struct lustre_handle lockh; ldlm_policy_data_t policy; enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ? - (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode; + (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode; struct lu_fid *fid; __u64 flags; int i; @@ -2888,15 +2880,12 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits, { ldlm_policy_data_t policy = { .l_inodebits = {bits} }; struct lu_fid *fid; - enum ldlm_mode rc; fid = &ll_i2info(inode)->lli_fid; CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); - rc = md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED, - fid, LDLM_IBITS, &policy, mode, lockh); - - return rc; + return md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED, + fid, LDLM_IBITS, &policy, mode, lockh); } static int ll_inode_revalidate_fini(struct inode *inode, int rc) @@ -2949,15 +2938,9 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) if (IS_ERR(op_data)) return PTR_ERR(op_data); - oit.it_create_mode |= M_CHECK_STALE; - rc = md_intent_lock(exp, op_data, NULL, 0, - /* we are not interested in name - * based lookup - */ - &oit, 0, &req, - ll_md_blocking_ast, 0); + rc = md_intent_lock(exp, op_data, &oit, &req, + &ll_md_blocking_ast, 0); ll_finish_md_op_data(op_data); - oit.it_create_mode &= ~M_CHECK_STALE; if (rc < 0) { rc = ll_inode_revalidate_fini(inode, rc); goto out; @@ -3003,10 +2986,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) op_data->op_valid = valid; rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); - if (rc) { - rc = ll_inode_revalidate_fini(inode, rc); - return rc; - } + if (rc) + return ll_inode_revalidate_fini(inode, rc); rc = ll_prep_inode(&inode, req, NULL, NULL); } @@ -3015,6 +2996,28 @@ out: return rc; } +static int ll_merge_md_attr(struct inode *inode) +{ + struct cl_attr attr = { 0 }; + int rc; + + LASSERT(ll_i2info(inode)->lli_lsm_md); + rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md, + &attr, ll_md_blocking_ast); + if (rc) + return rc; + + set_nlink(inode, attr.cat_nlink); + inode->i_blocks = attr.cat_blocks; + i_size_write(inode, attr.cat_size); + + ll_i2info(inode)->lli_atime = attr.cat_atime; + ll_i2info(inode)->lli_mtime = attr.cat_mtime; + ll_i2info(inode)->lli_ctime = attr.cat_ctime; + + return 0; +} + static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) { struct inode *inode = d_inode(dentry); @@ -3026,6 +3029,13 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) /* if object isn't regular file, don't validate size */ if (!S_ISREG(inode->i_mode)) { + if (S_ISDIR(inode->i_mode) && + ll_i2info(inode)->lli_lsm_md) { + rc = ll_merge_md_attr(inode); + if (rc) + return rc; + } + LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime; LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime; LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime; @@ -3057,13 +3067,14 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) if (res) return res; + OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30); + stat->dev = inode->i_sb->s_dev; if (ll_need_32bit_api(sbi)) stat->ino = cl_fid_build_ino(&lli->lli_fid, 1); else stat->ino = inode->i_ino; stat->mode = inode->i_mode; - stat->nlink = inode->i_nlink; stat->uid = inode->i_uid; stat->gid = inode->i_gid; stat->rdev = inode->i_rdev; @@ -3072,6 +3083,7 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) stat->ctime = inode->i_ctime; stat->blksize = 1 << inode->i_blkbits; + stat->nlink = inode->i_nlink; stat->size = i_size_read(inode); stat->blocks = inode->i_blocks; @@ -3139,6 +3151,12 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type) int ll_inode_permission(struct inode *inode, int mask) { + struct ll_sb_info *sbi; + struct root_squash_info *squash; + const struct cred *old_cred = NULL; + struct cred *cred = NULL; + bool squash_id = false; + cfs_cap_t cap; int rc = 0; if (mask & MAY_NOT_BLOCK) @@ -3158,9 +3176,46 @@ int ll_inode_permission(struct inode *inode, int mask) CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n", PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask); + /* squash fsuid/fsgid if needed */ + sbi = ll_i2sbi(inode); + squash = &sbi->ll_squash; + if (unlikely(squash->rsi_uid && + uid_eq(current_fsuid(), GLOBAL_ROOT_UID) && + !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) { + squash_id = true; + } + + if (squash_id) { + CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n", + __kuid_val(current_fsuid()), __kgid_val(current_fsgid()), + squash->rsi_uid, squash->rsi_gid); + + /* + * update current process's credentials + * and FS capability + */ + cred = prepare_creds(); + if (!cred) + return -ENOMEM; + + cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid); + cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid); + for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) { + if ((1 << cap) & CFS_CAP_FS_MASK) + cap_lower(cred->cap_effective, cap); + } + old_cred = override_creds(cred); + } + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1); rc = generic_permission(inode, mask); + /* restore current process's credentials and FS capability */ + if (squash_id) { + revert_creds(old_cred); + put_cred(cred); + } + return rc; } @@ -3173,7 +3228,7 @@ struct file_operations ll_file_operations = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, - .splice_read = ll_file_splice_read, + .splice_read = generic_file_splice_read, .fsync = ll_fsync, .flush = ll_flush }; @@ -3186,7 +3241,7 @@ struct file_operations ll_file_operations_flock = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, - .splice_read = ll_file_splice_read, + .splice_read = generic_file_splice_read, .fsync = ll_fsync, .flush = ll_flush, .flock = ll_file_flock, @@ -3202,7 +3257,7 @@ struct file_operations ll_file_operations_noflock = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, - .splice_read = ll_file_splice_read, + .splice_read = generic_file_splice_read, .fsync = ll_fsync, .flush = ll_flush, .flock = ll_file_noflock, @@ -3213,10 +3268,7 @@ const struct inode_operations ll_file_inode_operations = { .setattr = ll_setattr, .getattr = ll_getattr, .permission = ll_inode_permission, - .setxattr = ll_setxattr, - .getxattr = ll_getxattr, .listxattr = ll_listxattr, - .removexattr = ll_removexattr, .fiemap = ll_fiemap, .get_acl = ll_get_acl, }; @@ -3251,7 +3303,6 @@ void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd) if (!in_data) return NULL; - memset(in_data, 0, sizeof(*in_data)); in_data->iocd_size = size; in_data->iocd_cb = cb; in_data->iocd_count = count; @@ -3389,7 +3440,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) goto out; } - lmmsize = body->eadatasize; + lmmsize = body->mbo_eadatasize; if (lmmsize == 0) /* empty layout */ { rc = 0; goto out; @@ -3447,7 +3498,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, PFID(&lli->lli_fid), inode, reconf); /* in case this is a caching lock and reinstate with new inode */ - md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL); + md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL); lock_res_and_lock(lock); lvb_ready = ldlm_is_lvb_ready(lock); @@ -3557,8 +3608,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS, .ei_mode = LCK_CR, - .ei_cb_bl = ll_md_blocking_ast, - .ei_cb_cp = ldlm_completion_ast, + .ei_cb_bl = &ll_md_blocking_ast, + .ei_cb_cp = &ldlm_completion_ast, }; int rc; @@ -3604,8 +3655,7 @@ again: ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode); - rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh, - NULL, 0, NULL, 0); + rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0); ptlrpc_req_finished(it.it_request); it.it_request = NULL;