4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 goto posix_open_ret; /* caller does not need info */
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
166 cifs_fattr_to_inode(*pinode, &fattr);
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177 struct cifs_fid *fid, unsigned int xid)
182 int create_options = CREATE_NOT_DIR;
184 struct TCP_Server_Info *server = tcon->ses->server;
185 struct cifs_open_parms oparms;
187 if (!server->ops->open)
190 desired_access = cifs_convert_flags(f_flags);
192 /*********************************************************************
193 * open flag mapping table:
195 * POSIX Flag CIFS Disposition
196 * ---------- ----------------
197 * O_CREAT FILE_OPEN_IF
198 * O_CREAT | O_EXCL FILE_CREATE
199 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
200 * O_TRUNC FILE_OVERWRITE
201 * none of the above FILE_OPEN
203 * Note that there is not a direct match between disposition
204 * FILE_SUPERSEDE (ie create whether or not file exists although
205 * O_CREAT | O_TRUNC is similar but truncates the existing
206 * file rather than creating a new file as FILE_SUPERSEDE does
207 * (which uses the attributes / metadata passed in on open call)
209 *? O_SYNC is a reasonable match to CIFS writethrough flag
210 *? and the read write flags match reasonably. O_LARGEFILE
211 *? is irrelevant because largefile support is always used
212 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214 *********************************************************************/
216 disposition = cifs_get_disposition(f_flags);
218 /* BB pass O_SYNC flag through on file attributes .. BB */
220 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
224 if (backup_cred(cifs_sb))
225 create_options |= CREATE_OPEN_BACKUP_INTENT;
228 oparms.cifs_sb = cifs_sb;
229 oparms.desired_access = desired_access;
230 oparms.create_options = create_options;
231 oparms.disposition = disposition;
232 oparms.path = full_path;
234 oparms.reconnect = false;
236 rc = server->ops->open(xid, &oparms, oplock, buf);
242 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
245 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
256 struct cifs_fid_locks *cur;
257 bool has_locks = false;
259 down_read(&cinode->lock_sem);
260 list_for_each_entry(cur, &cinode->llist, llist) {
261 if (!list_empty(&cur->locks)) {
266 up_read(&cinode->lock_sem);
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272 struct tcon_link *tlink, __u32 oplock)
274 struct dentry *dentry = file_dentry(file);
275 struct inode *inode = d_inode(dentry);
276 struct cifsInodeInfo *cinode = CIFS_I(inode);
277 struct cifsFileInfo *cfile;
278 struct cifs_fid_locks *fdlocks;
279 struct cifs_tcon *tcon = tlink_tcon(tlink);
280 struct TCP_Server_Info *server = tcon->ses->server;
282 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
286 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
292 INIT_LIST_HEAD(&fdlocks->locks);
293 fdlocks->cfile = cfile;
294 cfile->llist = fdlocks;
295 down_write(&cinode->lock_sem);
296 list_add(&fdlocks->llist, &cinode->llist);
297 up_write(&cinode->lock_sem);
300 cfile->pid = current->tgid;
301 cfile->uid = current_fsuid();
302 cfile->dentry = dget(dentry);
303 cfile->f_flags = file->f_flags;
304 cfile->invalidHandle = false;
305 cfile->tlink = cifs_get_tlink(tlink);
306 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307 mutex_init(&cfile->fh_mutex);
308 spin_lock_init(&cfile->file_info_lock);
310 cifs_sb_active(inode->i_sb);
313 * If the server returned a read oplock and we have mandatory brlocks,
314 * set oplock level to None.
316 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
321 spin_lock(&tcon->open_file_lock);
322 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323 oplock = fid->pending_open->oplock;
324 list_del(&fid->pending_open->olist);
326 fid->purge_cache = false;
327 server->ops->set_fid(cfile, fid, oplock);
329 list_add(&cfile->tlist, &tcon->openFileList);
331 /* if readable file instance put first in list*/
332 if (file->f_mode & FMODE_READ)
333 list_add(&cfile->flist, &cinode->openFileList);
335 list_add_tail(&cfile->flist, &cinode->openFileList);
336 spin_unlock(&tcon->open_file_lock);
338 if (fid->purge_cache)
339 cifs_zap_mapping(inode);
341 file->private_data = cfile;
345 struct cifsFileInfo *
346 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
348 spin_lock(&cifs_file->file_info_lock);
349 cifsFileInfo_get_locked(cifs_file);
350 spin_unlock(&cifs_file->file_info_lock);
355 * Release a reference on the file private data. This may involve closing
356 * the filehandle out on the server. Must be called without holding
357 * tcon->open_file_lock and cifs_file->file_info_lock.
359 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
361 struct inode *inode = d_inode(cifs_file->dentry);
362 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
363 struct TCP_Server_Info *server = tcon->ses->server;
364 struct cifsInodeInfo *cifsi = CIFS_I(inode);
365 struct super_block *sb = inode->i_sb;
366 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
367 struct cifsLockInfo *li, *tmp;
369 struct cifs_pending_open open;
370 bool oplock_break_cancelled;
372 spin_lock(&tcon->open_file_lock);
374 spin_lock(&cifs_file->file_info_lock);
375 if (--cifs_file->count > 0) {
376 spin_unlock(&cifs_file->file_info_lock);
377 spin_unlock(&tcon->open_file_lock);
380 spin_unlock(&cifs_file->file_info_lock);
382 if (server->ops->get_lease_key)
383 server->ops->get_lease_key(inode, &fid);
385 /* store open in pending opens to make sure we don't miss lease break */
386 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
388 /* remove it from the lists */
389 list_del(&cifs_file->flist);
390 list_del(&cifs_file->tlist);
392 if (list_empty(&cifsi->openFileList)) {
393 cifs_dbg(FYI, "closing last open instance for inode %p\n",
394 d_inode(cifs_file->dentry));
396 * In strict cache mode we need invalidate mapping on the last
397 * close because it may cause a error when we open this file
398 * again and get at least level II oplock.
400 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
401 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
402 cifs_set_oplock_level(cifsi, 0);
405 spin_unlock(&tcon->open_file_lock);
407 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
409 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
410 struct TCP_Server_Info *server = tcon->ses->server;
414 if (server->ops->close)
415 server->ops->close(xid, tcon, &cifs_file->fid);
419 if (oplock_break_cancelled)
420 cifs_done_oplock_break(cifsi);
422 cifs_del_pending_open(&open);
425 * Delete any outstanding lock records. We'll lose them when the file
428 down_write(&cifsi->lock_sem);
429 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
430 list_del(&li->llist);
431 cifs_del_lock_waiters(li);
434 list_del(&cifs_file->llist->llist);
435 kfree(cifs_file->llist);
436 up_write(&cifsi->lock_sem);
438 cifs_put_tlink(cifs_file->tlink);
439 dput(cifs_file->dentry);
440 cifs_sb_deactive(sb);
444 int cifs_open(struct inode *inode, struct file *file)
450 struct cifs_sb_info *cifs_sb;
451 struct TCP_Server_Info *server;
452 struct cifs_tcon *tcon;
453 struct tcon_link *tlink;
454 struct cifsFileInfo *cfile = NULL;
455 char *full_path = NULL;
456 bool posix_open_ok = false;
458 struct cifs_pending_open open;
462 cifs_sb = CIFS_SB(inode->i_sb);
463 tlink = cifs_sb_tlink(cifs_sb);
466 return PTR_ERR(tlink);
468 tcon = tlink_tcon(tlink);
469 server = tcon->ses->server;
471 full_path = build_path_from_dentry(file_dentry(file));
472 if (full_path == NULL) {
477 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
478 inode, file->f_flags, full_path);
480 if (file->f_flags & O_DIRECT &&
481 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
482 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
483 file->f_op = &cifs_file_direct_nobrl_ops;
485 file->f_op = &cifs_file_direct_ops;
493 if (!tcon->broken_posix_open && tcon->unix_ext &&
494 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
495 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
496 /* can not refresh inode info since size could be stale */
497 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
498 cifs_sb->mnt_file_mode /* ignored */,
499 file->f_flags, &oplock, &fid.netfid, xid);
501 cifs_dbg(FYI, "posix open succeeded\n");
502 posix_open_ok = true;
503 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
504 if (tcon->ses->serverNOS)
505 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
506 tcon->ses->serverName,
507 tcon->ses->serverNOS);
508 tcon->broken_posix_open = true;
509 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
510 (rc != -EOPNOTSUPP)) /* path not found or net err */
513 * Else fallthrough to retry open the old way on network i/o
518 if (server->ops->get_lease_key)
519 server->ops->get_lease_key(inode, &fid);
521 cifs_add_pending_open(&fid, tlink, &open);
523 if (!posix_open_ok) {
524 if (server->ops->get_lease_key)
525 server->ops->get_lease_key(inode, &fid);
527 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
528 file->f_flags, &oplock, &fid, xid);
530 cifs_del_pending_open(&open);
535 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
537 if (server->ops->close)
538 server->ops->close(xid, tcon, &fid);
539 cifs_del_pending_open(&open);
544 cifs_fscache_set_inode_cookie(inode, file);
546 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
548 * Time to set mode which we can not set earlier due to
549 * problems creating new read-only files.
551 struct cifs_unix_set_info_args args = {
552 .mode = inode->i_mode,
553 .uid = INVALID_UID, /* no change */
554 .gid = INVALID_GID, /* no change */
555 .ctime = NO_CHANGE_64,
556 .atime = NO_CHANGE_64,
557 .mtime = NO_CHANGE_64,
560 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
567 cifs_put_tlink(tlink);
571 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
574 * Try to reacquire byte range locks that were released when session
575 * to server was lost.
578 cifs_relock_file(struct cifsFileInfo *cfile)
580 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
581 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
582 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
585 down_read(&cinode->lock_sem);
586 if (cinode->can_cache_brlcks) {
587 /* can cache locks - no need to relock */
588 up_read(&cinode->lock_sem);
592 if (cap_unix(tcon->ses) &&
593 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
594 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
595 rc = cifs_push_posix_locks(cfile);
597 rc = tcon->ses->server->ops->push_mand_locks(cfile);
599 up_read(&cinode->lock_sem);
604 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
609 struct cifs_sb_info *cifs_sb;
610 struct cifs_tcon *tcon;
611 struct TCP_Server_Info *server;
612 struct cifsInodeInfo *cinode;
614 char *full_path = NULL;
616 int disposition = FILE_OPEN;
617 int create_options = CREATE_NOT_DIR;
618 struct cifs_open_parms oparms;
621 mutex_lock(&cfile->fh_mutex);
622 if (!cfile->invalidHandle) {
623 mutex_unlock(&cfile->fh_mutex);
629 inode = d_inode(cfile->dentry);
630 cifs_sb = CIFS_SB(inode->i_sb);
631 tcon = tlink_tcon(cfile->tlink);
632 server = tcon->ses->server;
635 * Can not grab rename sem here because various ops, including those
636 * that already have the rename sem can end up causing writepage to get
637 * called and if the server was down that means we end up here, and we
638 * can never tell if the caller already has the rename_sem.
640 full_path = build_path_from_dentry(cfile->dentry);
641 if (full_path == NULL) {
643 mutex_unlock(&cfile->fh_mutex);
648 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
649 inode, cfile->f_flags, full_path);
651 if (tcon->ses->server->oplocks)
656 if (tcon->unix_ext && cap_unix(tcon->ses) &&
657 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
658 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
660 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
661 * original open. Must mask them off for a reopen.
663 unsigned int oflags = cfile->f_flags &
664 ~(O_CREAT | O_EXCL | O_TRUNC);
666 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
667 cifs_sb->mnt_file_mode /* ignored */,
668 oflags, &oplock, &cfile->fid.netfid, xid);
670 cifs_dbg(FYI, "posix reopen succeeded\n");
671 oparms.reconnect = true;
675 * fallthrough to retry open the old way on errors, especially
676 * in the reconnect path it is important to retry hard
680 desired_access = cifs_convert_flags(cfile->f_flags);
682 if (backup_cred(cifs_sb))
683 create_options |= CREATE_OPEN_BACKUP_INTENT;
685 if (server->ops->get_lease_key)
686 server->ops->get_lease_key(inode, &cfile->fid);
689 oparms.cifs_sb = cifs_sb;
690 oparms.desired_access = desired_access;
691 oparms.create_options = create_options;
692 oparms.disposition = disposition;
693 oparms.path = full_path;
694 oparms.fid = &cfile->fid;
695 oparms.reconnect = true;
698 * Can not refresh inode by passing in file_info buf to be returned by
699 * ops->open and then calling get_inode_info with returned buf since
700 * file might have write behind data that needs to be flushed and server
701 * version of file size can be stale. If we knew for sure that inode was
702 * not dirty locally we could do this.
704 rc = server->ops->open(xid, &oparms, &oplock, NULL);
705 if (rc == -ENOENT && oparms.reconnect == false) {
706 /* durable handle timeout is expired - open the file again */
707 rc = server->ops->open(xid, &oparms, &oplock, NULL);
708 /* indicate that we need to relock the file */
709 oparms.reconnect = true;
713 mutex_unlock(&cfile->fh_mutex);
714 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
715 cifs_dbg(FYI, "oplock: %d\n", oplock);
716 goto reopen_error_exit;
720 cfile->invalidHandle = false;
721 mutex_unlock(&cfile->fh_mutex);
722 cinode = CIFS_I(inode);
725 rc = filemap_write_and_wait(inode->i_mapping);
726 mapping_set_error(inode->i_mapping, rc);
729 rc = cifs_get_inode_info_unix(&inode, full_path,
732 rc = cifs_get_inode_info(&inode, full_path, NULL,
733 inode->i_sb, xid, NULL);
736 * Else we are writing out data to server already and could deadlock if
737 * we tried to flush data, and since we do not know if we have data that
738 * would invalidate the current end of file on the server we can not go
739 * to the server to get the new inode info.
742 server->ops->set_fid(cfile, &cfile->fid, oplock);
743 if (oparms.reconnect)
744 cifs_relock_file(cfile);
752 int cifs_close(struct inode *inode, struct file *file)
754 if (file->private_data != NULL) {
755 cifsFileInfo_put(file->private_data);
756 file->private_data = NULL;
759 /* return code from the ->release op is always ignored */
764 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
766 struct cifsFileInfo *open_file;
767 struct list_head *tmp;
768 struct list_head *tmp1;
769 struct list_head tmp_list;
771 cifs_dbg(FYI, "Reopen persistent handles");
772 INIT_LIST_HEAD(&tmp_list);
774 /* list all files open on tree connection, reopen resilient handles */
775 spin_lock(&tcon->open_file_lock);
776 list_for_each(tmp, &tcon->openFileList) {
777 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
778 if (!open_file->invalidHandle)
780 cifsFileInfo_get(open_file);
781 list_add_tail(&open_file->rlist, &tmp_list);
783 spin_unlock(&tcon->open_file_lock);
785 list_for_each_safe(tmp, tmp1, &tmp_list) {
786 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
787 cifs_reopen_file(open_file, false /* do not flush */);
788 list_del_init(&open_file->rlist);
789 cifsFileInfo_put(open_file);
793 int cifs_closedir(struct inode *inode, struct file *file)
797 struct cifsFileInfo *cfile = file->private_data;
798 struct cifs_tcon *tcon;
799 struct TCP_Server_Info *server;
802 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
808 tcon = tlink_tcon(cfile->tlink);
809 server = tcon->ses->server;
811 cifs_dbg(FYI, "Freeing private data in close dir\n");
812 spin_lock(&cfile->file_info_lock);
813 if (server->ops->dir_needs_close(cfile)) {
814 cfile->invalidHandle = true;
815 spin_unlock(&cfile->file_info_lock);
816 if (server->ops->close_dir)
817 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
820 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
821 /* not much we can do if it fails anyway, ignore rc */
824 spin_unlock(&cfile->file_info_lock);
826 buf = cfile->srch_inf.ntwrk_buf_start;
828 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
829 cfile->srch_inf.ntwrk_buf_start = NULL;
830 if (cfile->srch_inf.smallBuf)
831 cifs_small_buf_release(buf);
833 cifs_buf_release(buf);
836 cifs_put_tlink(cfile->tlink);
837 kfree(file->private_data);
838 file->private_data = NULL;
839 /* BB can we lock the filestruct while this is going on? */
844 static struct cifsLockInfo *
845 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
847 struct cifsLockInfo *lock =
848 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
851 lock->offset = offset;
852 lock->length = length;
854 lock->pid = current->tgid;
855 INIT_LIST_HEAD(&lock->blist);
856 init_waitqueue_head(&lock->block_q);
861 cifs_del_lock_waiters(struct cifsLockInfo *lock)
863 struct cifsLockInfo *li, *tmp;
864 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
865 list_del_init(&li->blist);
866 wake_up(&li->block_q);
870 #define CIFS_LOCK_OP 0
871 #define CIFS_READ_OP 1
872 #define CIFS_WRITE_OP 2
874 /* @rw_check : 0 - no op, 1 - read, 2 - write */
876 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
877 __u64 length, __u8 type, struct cifsFileInfo *cfile,
878 struct cifsLockInfo **conf_lock, int rw_check)
880 struct cifsLockInfo *li;
881 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
882 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
884 list_for_each_entry(li, &fdlocks->locks, llist) {
885 if (offset + length <= li->offset ||
886 offset >= li->offset + li->length)
888 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
889 server->ops->compare_fids(cfile, cur_cfile)) {
890 /* shared lock prevents write op through the same fid */
891 if (!(li->type & server->vals->shared_lock_type) ||
892 rw_check != CIFS_WRITE_OP)
895 if ((type & server->vals->shared_lock_type) &&
896 ((server->ops->compare_fids(cfile, cur_cfile) &&
897 current->tgid == li->pid) || type == li->type))
907 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
908 __u8 type, struct cifsLockInfo **conf_lock,
912 struct cifs_fid_locks *cur;
913 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
915 list_for_each_entry(cur, &cinode->llist, llist) {
916 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
917 cfile, conf_lock, rw_check);
926 * Check if there is another lock that prevents us to set the lock (mandatory
927 * style). If such a lock exists, update the flock structure with its
928 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
929 * or leave it the same if we can't. Returns 0 if we don't need to request to
930 * the server or 1 otherwise.
933 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
934 __u8 type, struct file_lock *flock)
937 struct cifsLockInfo *conf_lock;
938 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
939 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
942 down_read(&cinode->lock_sem);
944 exist = cifs_find_lock_conflict(cfile, offset, length, type,
945 &conf_lock, CIFS_LOCK_OP);
947 flock->fl_start = conf_lock->offset;
948 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
949 flock->fl_pid = conf_lock->pid;
950 if (conf_lock->type & server->vals->shared_lock_type)
951 flock->fl_type = F_RDLCK;
953 flock->fl_type = F_WRLCK;
954 } else if (!cinode->can_cache_brlcks)
957 flock->fl_type = F_UNLCK;
959 up_read(&cinode->lock_sem);
964 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
966 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
967 down_write(&cinode->lock_sem);
968 list_add_tail(&lock->llist, &cfile->llist->locks);
969 up_write(&cinode->lock_sem);
973 * Set the byte-range lock (mandatory style). Returns:
974 * 1) 0, if we set the lock and don't need to request to the server;
975 * 2) 1, if no locks prevent us but we need to request to the server;
976 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
979 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
982 struct cifsLockInfo *conf_lock;
983 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
989 down_write(&cinode->lock_sem);
991 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
992 lock->type, &conf_lock, CIFS_LOCK_OP);
993 if (!exist && cinode->can_cache_brlcks) {
994 list_add_tail(&lock->llist, &cfile->llist->locks);
995 up_write(&cinode->lock_sem);
1004 list_add_tail(&lock->blist, &conf_lock->blist);
1005 up_write(&cinode->lock_sem);
1006 rc = wait_event_interruptible(lock->block_q,
1007 (lock->blist.prev == &lock->blist) &&
1008 (lock->blist.next == &lock->blist));
1011 down_write(&cinode->lock_sem);
1012 list_del_init(&lock->blist);
1015 up_write(&cinode->lock_sem);
1020 * Check if there is another lock that prevents us to set the lock (posix
1021 * style). If such a lock exists, update the flock structure with its
1022 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1023 * or leave it the same if we can't. Returns 0 if we don't need to request to
1024 * the server or 1 otherwise.
1027 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1030 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1031 unsigned char saved_type = flock->fl_type;
1033 if ((flock->fl_flags & FL_POSIX) == 0)
1036 down_read(&cinode->lock_sem);
1037 posix_test_lock(file, flock);
1039 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1040 flock->fl_type = saved_type;
1044 up_read(&cinode->lock_sem);
1049 * Set the byte-range lock (posix style). Returns:
1050 * 1) 0, if we set the lock and don't need to request to the server;
1051 * 2) 1, if we need to request to the server;
1052 * 3) <0, if the error occurs while setting the lock.
1055 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1057 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1060 if ((flock->fl_flags & FL_POSIX) == 0)
1064 down_write(&cinode->lock_sem);
1065 if (!cinode->can_cache_brlcks) {
1066 up_write(&cinode->lock_sem);
1070 rc = posix_lock_file(file, flock, NULL);
1071 up_write(&cinode->lock_sem);
1072 if (rc == FILE_LOCK_DEFERRED) {
1073 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1076 posix_unblock_lock(flock);
1082 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1085 int rc = 0, stored_rc;
1086 struct cifsLockInfo *li, *tmp;
1087 struct cifs_tcon *tcon;
1088 unsigned int num, max_num, max_buf;
1089 LOCKING_ANDX_RANGE *buf, *cur;
1090 int types[] = {LOCKING_ANDX_LARGE_FILES,
1091 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1095 tcon = tlink_tcon(cfile->tlink);
1098 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1099 * and check it for zero before using.
1101 max_buf = tcon->ses->server->maxBuf;
1107 max_num = (max_buf - sizeof(struct smb_hdr)) /
1108 sizeof(LOCKING_ANDX_RANGE);
1109 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1115 for (i = 0; i < 2; i++) {
1118 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1119 if (li->type != types[i])
1121 cur->Pid = cpu_to_le16(li->pid);
1122 cur->LengthLow = cpu_to_le32((u32)li->length);
1123 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1124 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1125 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1126 if (++num == max_num) {
1127 stored_rc = cifs_lockv(xid, tcon,
1129 (__u8)li->type, 0, num,
1140 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1141 (__u8)types[i], 0, num, buf);
1153 hash_lockowner(fl_owner_t owner)
1155 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1158 struct lock_to_push {
1159 struct list_head llist;
1168 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1170 struct inode *inode = d_inode(cfile->dentry);
1171 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1172 struct file_lock *flock;
1173 struct file_lock_context *flctx = inode->i_flctx;
1174 unsigned int count = 0, i;
1175 int rc = 0, xid, type;
1176 struct list_head locks_to_send, *el;
1177 struct lock_to_push *lck, *tmp;
1185 spin_lock(&flctx->flc_lock);
1186 list_for_each(el, &flctx->flc_posix) {
1189 spin_unlock(&flctx->flc_lock);
1191 INIT_LIST_HEAD(&locks_to_send);
1194 * Allocating count locks is enough because no FL_POSIX locks can be
1195 * added to the list while we are holding cinode->lock_sem that
1196 * protects locking operations of this inode.
1198 for (i = 0; i < count; i++) {
1199 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1204 list_add_tail(&lck->llist, &locks_to_send);
1207 el = locks_to_send.next;
1208 spin_lock(&flctx->flc_lock);
1209 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1210 if (el == &locks_to_send) {
1212 * The list ended. We don't have enough allocated
1213 * structures - something is really wrong.
1215 cifs_dbg(VFS, "Can't push all brlocks!\n");
1218 length = 1 + flock->fl_end - flock->fl_start;
1219 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1223 lck = list_entry(el, struct lock_to_push, llist);
1224 lck->pid = hash_lockowner(flock->fl_owner);
1225 lck->netfid = cfile->fid.netfid;
1226 lck->length = length;
1228 lck->offset = flock->fl_start;
1230 spin_unlock(&flctx->flc_lock);
1232 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1235 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1236 lck->offset, lck->length, NULL,
1240 list_del(&lck->llist);
1248 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1249 list_del(&lck->llist);
1256 cifs_push_locks(struct cifsFileInfo *cfile)
1258 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1259 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1260 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1263 /* we are going to update can_cache_brlcks here - need a write access */
1264 down_write(&cinode->lock_sem);
1265 if (!cinode->can_cache_brlcks) {
1266 up_write(&cinode->lock_sem);
1270 if (cap_unix(tcon->ses) &&
1271 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1272 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1273 rc = cifs_push_posix_locks(cfile);
1275 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1277 cinode->can_cache_brlcks = false;
1278 up_write(&cinode->lock_sem);
1283 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1284 bool *wait_flag, struct TCP_Server_Info *server)
1286 if (flock->fl_flags & FL_POSIX)
1287 cifs_dbg(FYI, "Posix\n");
1288 if (flock->fl_flags & FL_FLOCK)
1289 cifs_dbg(FYI, "Flock\n");
1290 if (flock->fl_flags & FL_SLEEP) {
1291 cifs_dbg(FYI, "Blocking lock\n");
1294 if (flock->fl_flags & FL_ACCESS)
1295 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1296 if (flock->fl_flags & FL_LEASE)
1297 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1298 if (flock->fl_flags &
1299 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1300 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1301 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1303 *type = server->vals->large_lock_type;
1304 if (flock->fl_type == F_WRLCK) {
1305 cifs_dbg(FYI, "F_WRLCK\n");
1306 *type |= server->vals->exclusive_lock_type;
1308 } else if (flock->fl_type == F_UNLCK) {
1309 cifs_dbg(FYI, "F_UNLCK\n");
1310 *type |= server->vals->unlock_lock_type;
1312 /* Check if unlock includes more than one lock range */
1313 } else if (flock->fl_type == F_RDLCK) {
1314 cifs_dbg(FYI, "F_RDLCK\n");
1315 *type |= server->vals->shared_lock_type;
1317 } else if (flock->fl_type == F_EXLCK) {
1318 cifs_dbg(FYI, "F_EXLCK\n");
1319 *type |= server->vals->exclusive_lock_type;
1321 } else if (flock->fl_type == F_SHLCK) {
1322 cifs_dbg(FYI, "F_SHLCK\n");
1323 *type |= server->vals->shared_lock_type;
1326 cifs_dbg(FYI, "Unknown type of lock\n");
1330 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1331 bool wait_flag, bool posix_lck, unsigned int xid)
1334 __u64 length = 1 + flock->fl_end - flock->fl_start;
1335 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1336 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1337 struct TCP_Server_Info *server = tcon->ses->server;
1338 __u16 netfid = cfile->fid.netfid;
1341 int posix_lock_type;
1343 rc = cifs_posix_lock_test(file, flock);
1347 if (type & server->vals->shared_lock_type)
1348 posix_lock_type = CIFS_RDLCK;
1350 posix_lock_type = CIFS_WRLCK;
1351 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1352 hash_lockowner(flock->fl_owner),
1353 flock->fl_start, length, flock,
1354 posix_lock_type, wait_flag);
1358 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1362 /* BB we could chain these into one lock request BB */
1363 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1366 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1368 flock->fl_type = F_UNLCK;
1370 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1375 if (type & server->vals->shared_lock_type) {
1376 flock->fl_type = F_WRLCK;
1380 type &= ~server->vals->exclusive_lock_type;
1382 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1383 type | server->vals->shared_lock_type,
1386 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1387 type | server->vals->shared_lock_type, 0, 1, false);
1388 flock->fl_type = F_RDLCK;
1390 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1393 flock->fl_type = F_WRLCK;
1399 cifs_move_llist(struct list_head *source, struct list_head *dest)
1401 struct list_head *li, *tmp;
1402 list_for_each_safe(li, tmp, source)
1403 list_move(li, dest);
1407 cifs_free_llist(struct list_head *llist)
1409 struct cifsLockInfo *li, *tmp;
1410 list_for_each_entry_safe(li, tmp, llist, llist) {
1411 cifs_del_lock_waiters(li);
1412 list_del(&li->llist);
1418 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1421 int rc = 0, stored_rc;
1422 int types[] = {LOCKING_ANDX_LARGE_FILES,
1423 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1425 unsigned int max_num, num, max_buf;
1426 LOCKING_ANDX_RANGE *buf, *cur;
1427 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1428 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1429 struct cifsLockInfo *li, *tmp;
1430 __u64 length = 1 + flock->fl_end - flock->fl_start;
1431 struct list_head tmp_llist;
1433 INIT_LIST_HEAD(&tmp_llist);
1436 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1437 * and check it for zero before using.
1439 max_buf = tcon->ses->server->maxBuf;
1443 max_num = (max_buf - sizeof(struct smb_hdr)) /
1444 sizeof(LOCKING_ANDX_RANGE);
1445 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1449 down_write(&cinode->lock_sem);
1450 for (i = 0; i < 2; i++) {
1453 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1454 if (flock->fl_start > li->offset ||
1455 (flock->fl_start + length) <
1456 (li->offset + li->length))
1458 if (current->tgid != li->pid)
1460 if (types[i] != li->type)
1462 if (cinode->can_cache_brlcks) {
1464 * We can cache brlock requests - simply remove
1465 * a lock from the file's list.
1467 list_del(&li->llist);
1468 cifs_del_lock_waiters(li);
1472 cur->Pid = cpu_to_le16(li->pid);
1473 cur->LengthLow = cpu_to_le32((u32)li->length);
1474 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1475 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1476 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1478 * We need to save a lock here to let us add it again to
1479 * the file's list if the unlock range request fails on
1482 list_move(&li->llist, &tmp_llist);
1483 if (++num == max_num) {
1484 stored_rc = cifs_lockv(xid, tcon,
1486 li->type, num, 0, buf);
1489 * We failed on the unlock range
1490 * request - add all locks from the tmp
1491 * list to the head of the file's list.
1493 cifs_move_llist(&tmp_llist,
1494 &cfile->llist->locks);
1498 * The unlock range request succeed -
1499 * free the tmp list.
1501 cifs_free_llist(&tmp_llist);
1508 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1509 types[i], num, 0, buf);
1511 cifs_move_llist(&tmp_llist,
1512 &cfile->llist->locks);
1515 cifs_free_llist(&tmp_llist);
1519 up_write(&cinode->lock_sem);
1525 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1526 bool wait_flag, bool posix_lck, int lock, int unlock,
1530 __u64 length = 1 + flock->fl_end - flock->fl_start;
1531 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1532 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1533 struct TCP_Server_Info *server = tcon->ses->server;
1534 struct inode *inode = d_inode(cfile->dentry);
1537 int posix_lock_type;
1539 rc = cifs_posix_lock_set(file, flock);
1543 if (type & server->vals->shared_lock_type)
1544 posix_lock_type = CIFS_RDLCK;
1546 posix_lock_type = CIFS_WRLCK;
1549 posix_lock_type = CIFS_UNLCK;
1551 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1552 hash_lockowner(flock->fl_owner),
1553 flock->fl_start, length,
1554 NULL, posix_lock_type, wait_flag);
1559 struct cifsLockInfo *lock;
1561 lock = cifs_lock_init(flock->fl_start, length, type);
1565 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1574 * Windows 7 server can delay breaking lease from read to None
1575 * if we set a byte-range lock on a file - break it explicitly
1576 * before sending the lock to the server to be sure the next
1577 * read won't conflict with non-overlapted locks due to
1580 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1581 CIFS_CACHE_READ(CIFS_I(inode))) {
1582 cifs_zap_mapping(inode);
1583 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1585 CIFS_I(inode)->oplock = 0;
1588 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1589 type, 1, 0, wait_flag);
1595 cifs_lock_add(cfile, lock);
1597 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1600 if (flock->fl_flags & FL_POSIX && !rc)
1601 rc = locks_lock_file_wait(file, flock);
1605 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1608 int lock = 0, unlock = 0;
1609 bool wait_flag = false;
1610 bool posix_lck = false;
1611 struct cifs_sb_info *cifs_sb;
1612 struct cifs_tcon *tcon;
1613 struct cifsInodeInfo *cinode;
1614 struct cifsFileInfo *cfile;
1621 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1622 cmd, flock->fl_flags, flock->fl_type,
1623 flock->fl_start, flock->fl_end);
1625 cfile = (struct cifsFileInfo *)file->private_data;
1626 tcon = tlink_tcon(cfile->tlink);
1628 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1631 cifs_sb = CIFS_FILE_SB(file);
1632 netfid = cfile->fid.netfid;
1633 cinode = CIFS_I(file_inode(file));
1635 if (cap_unix(tcon->ses) &&
1636 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1637 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1640 * BB add code here to normalize offset and length to account for
1641 * negative length which we can not accept over the wire.
1643 if (IS_GETLK(cmd)) {
1644 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1649 if (!lock && !unlock) {
1651 * if no lock or unlock then nothing to do since we do not
1658 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1665 * update the file size (if needed) after a write. Should be called with
1666 * the inode->i_lock held
1669 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1670 unsigned int bytes_written)
1672 loff_t end_of_write = offset + bytes_written;
1674 if (end_of_write > cifsi->server_eof)
1675 cifsi->server_eof = end_of_write;
1679 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1680 size_t write_size, loff_t *offset)
1683 unsigned int bytes_written = 0;
1684 unsigned int total_written;
1685 struct cifs_sb_info *cifs_sb;
1686 struct cifs_tcon *tcon;
1687 struct TCP_Server_Info *server;
1689 struct dentry *dentry = open_file->dentry;
1690 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1691 struct cifs_io_parms io_parms;
1693 cifs_sb = CIFS_SB(dentry->d_sb);
1695 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1696 write_size, *offset, dentry);
1698 tcon = tlink_tcon(open_file->tlink);
1699 server = tcon->ses->server;
1701 if (!server->ops->sync_write)
1706 for (total_written = 0; write_size > total_written;
1707 total_written += bytes_written) {
1709 while (rc == -EAGAIN) {
1713 if (open_file->invalidHandle) {
1714 /* we could deadlock if we called
1715 filemap_fdatawait from here so tell
1716 reopen_file not to flush data to
1718 rc = cifs_reopen_file(open_file, false);
1723 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1724 (unsigned int)write_size - total_written);
1725 /* iov[0] is reserved for smb header */
1726 iov[1].iov_base = (char *)write_data + total_written;
1727 iov[1].iov_len = len;
1729 io_parms.tcon = tcon;
1730 io_parms.offset = *offset;
1731 io_parms.length = len;
1732 rc = server->ops->sync_write(xid, &open_file->fid,
1733 &io_parms, &bytes_written, iov, 1);
1735 if (rc || (bytes_written == 0)) {
1743 spin_lock(&d_inode(dentry)->i_lock);
1744 cifs_update_eof(cifsi, *offset, bytes_written);
1745 spin_unlock(&d_inode(dentry)->i_lock);
1746 *offset += bytes_written;
1750 cifs_stats_bytes_written(tcon, total_written);
1752 if (total_written > 0) {
1753 spin_lock(&d_inode(dentry)->i_lock);
1754 if (*offset > d_inode(dentry)->i_size)
1755 i_size_write(d_inode(dentry), *offset);
1756 spin_unlock(&d_inode(dentry)->i_lock);
1758 mark_inode_dirty_sync(d_inode(dentry));
1760 return total_written;
1763 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1766 struct cifsFileInfo *open_file = NULL;
1767 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1768 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1770 /* only filter by fsuid on multiuser mounts */
1771 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1774 spin_lock(&tcon->open_file_lock);
1775 /* we could simply get the first_list_entry since write-only entries
1776 are always at the end of the list but since the first entry might
1777 have a close pending, we go through the whole list */
1778 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1779 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1781 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1782 if (!open_file->invalidHandle) {
1783 /* found a good file */
1784 /* lock it so it will not be closed on us */
1785 cifsFileInfo_get(open_file);
1786 spin_unlock(&tcon->open_file_lock);
1788 } /* else might as well continue, and look for
1789 another, or simply have the caller reopen it
1790 again rather than trying to fix this handle */
1791 } else /* write only file */
1792 break; /* write only files are last so must be done */
1794 spin_unlock(&tcon->open_file_lock);
1798 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1801 struct cifsFileInfo *open_file, *inv_file = NULL;
1802 struct cifs_sb_info *cifs_sb;
1803 struct cifs_tcon *tcon;
1804 bool any_available = false;
1806 unsigned int refind = 0;
1808 /* Having a null inode here (because mapping->host was set to zero by
1809 the VFS or MM) should not happen but we had reports of on oops (due to
1810 it being zero) during stress testcases so we need to check for it */
1812 if (cifs_inode == NULL) {
1813 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1818 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1819 tcon = cifs_sb_master_tcon(cifs_sb);
1821 /* only filter by fsuid on multiuser mounts */
1822 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1825 spin_lock(&tcon->open_file_lock);
1827 if (refind > MAX_REOPEN_ATT) {
1828 spin_unlock(&tcon->open_file_lock);
1831 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1832 if (!any_available && open_file->pid != current->tgid)
1834 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1836 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1837 if (!open_file->invalidHandle) {
1838 /* found a good writable file */
1839 cifsFileInfo_get(open_file);
1840 spin_unlock(&tcon->open_file_lock);
1844 inv_file = open_file;
1848 /* couldn't find useable FH with same pid, try any available */
1849 if (!any_available) {
1850 any_available = true;
1851 goto refind_writable;
1855 any_available = false;
1856 cifsFileInfo_get(inv_file);
1859 spin_unlock(&tcon->open_file_lock);
1862 rc = cifs_reopen_file(inv_file, false);
1866 spin_lock(&tcon->open_file_lock);
1867 list_move_tail(&inv_file->flist,
1868 &cifs_inode->openFileList);
1869 spin_unlock(&tcon->open_file_lock);
1870 cifsFileInfo_put(inv_file);
1873 spin_lock(&tcon->open_file_lock);
1874 goto refind_writable;
1881 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1883 struct address_space *mapping = page->mapping;
1884 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1887 int bytes_written = 0;
1888 struct inode *inode;
1889 struct cifsFileInfo *open_file;
1891 if (!mapping || !mapping->host)
1894 inode = page->mapping->host;
1896 offset += (loff_t)from;
1897 write_data = kmap(page);
1900 if ((to > PAGE_SIZE) || (from > to)) {
1905 /* racing with truncate? */
1906 if (offset > mapping->host->i_size) {
1908 return 0; /* don't care */
1911 /* check to make sure that we are not extending the file */
1912 if (mapping->host->i_size - offset < (loff_t)to)
1913 to = (unsigned)(mapping->host->i_size - offset);
1915 open_file = find_writable_file(CIFS_I(mapping->host), false);
1917 bytes_written = cifs_write(open_file, open_file->pid,
1918 write_data, to - from, &offset);
1919 cifsFileInfo_put(open_file);
1920 /* Does mm or vfs already set times? */
1921 inode->i_atime = inode->i_mtime = current_time(inode);
1922 if ((bytes_written > 0) && (offset))
1924 else if (bytes_written < 0)
1927 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1935 static struct cifs_writedata *
1936 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1937 pgoff_t end, pgoff_t *index,
1938 unsigned int *found_pages)
1940 unsigned int nr_pages;
1941 struct page **pages;
1942 struct cifs_writedata *wdata;
1944 wdata = cifs_writedata_alloc((unsigned int)tofind,
1945 cifs_writev_complete);
1950 * find_get_pages_tag seems to return a max of 256 on each
1951 * iteration, so we must call it several times in order to
1952 * fill the array or the wsize is effectively limited to
1956 pages = wdata->pages;
1958 nr_pages = find_get_pages_tag(mapping, index,
1959 PAGECACHE_TAG_DIRTY, tofind,
1961 *found_pages += nr_pages;
1964 } while (nr_pages && tofind && *index <= end);
1970 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1971 struct address_space *mapping,
1972 struct writeback_control *wbc,
1973 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1975 unsigned int nr_pages = 0, i;
1978 for (i = 0; i < found_pages; i++) {
1979 page = wdata->pages[i];
1981 * At this point we hold neither mapping->tree_lock nor
1982 * lock on the page itself: the page may be truncated or
1983 * invalidated (changing page->mapping to NULL), or even
1984 * swizzled back from swapper_space to tmpfs file
1990 else if (!trylock_page(page))
1993 if (unlikely(page->mapping != mapping)) {
1998 if (!wbc->range_cyclic && page->index > end) {
2004 if (*next && (page->index != *next)) {
2005 /* Not next consecutive page */
2010 if (wbc->sync_mode != WB_SYNC_NONE)
2011 wait_on_page_writeback(page);
2013 if (PageWriteback(page) ||
2014 !clear_page_dirty_for_io(page)) {
2020 * This actually clears the dirty bit in the radix tree.
2021 * See cifs_writepage() for more commentary.
2023 set_page_writeback(page);
2024 if (page_offset(page) >= i_size_read(mapping->host)) {
2027 end_page_writeback(page);
2031 wdata->pages[i] = page;
2032 *next = page->index + 1;
2036 /* reset index to refind any pages skipped */
2038 *index = wdata->pages[0]->index + 1;
2040 /* put any pages we aren't going to use */
2041 for (i = nr_pages; i < found_pages; i++) {
2042 put_page(wdata->pages[i]);
2043 wdata->pages[i] = NULL;
2050 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2051 struct address_space *mapping, struct writeback_control *wbc)
2054 struct TCP_Server_Info *server;
2057 wdata->sync_mode = wbc->sync_mode;
2058 wdata->nr_pages = nr_pages;
2059 wdata->offset = page_offset(wdata->pages[0]);
2060 wdata->pagesz = PAGE_SIZE;
2061 wdata->tailsz = min(i_size_read(mapping->host) -
2062 page_offset(wdata->pages[nr_pages - 1]),
2064 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2066 if (wdata->cfile != NULL)
2067 cifsFileInfo_put(wdata->cfile);
2068 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2069 if (!wdata->cfile) {
2070 cifs_dbg(VFS, "No writable handles for inode\n");
2073 wdata->pid = wdata->cfile->pid;
2074 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2075 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2078 for (i = 0; i < nr_pages; ++i)
2079 unlock_page(wdata->pages[i]);
2084 static int cifs_writepages(struct address_space *mapping,
2085 struct writeback_control *wbc)
2087 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2088 struct TCP_Server_Info *server;
2089 bool done = false, scanned = false, range_whole = false;
2091 struct cifs_writedata *wdata;
2095 * If wsize is smaller than the page cache size, default to writing
2096 * one page at a time via cifs_writepage
2098 if (cifs_sb->wsize < PAGE_SIZE)
2099 return generic_writepages(mapping, wbc);
2101 if (wbc->range_cyclic) {
2102 index = mapping->writeback_index; /* Start from prev offset */
2105 index = wbc->range_start >> PAGE_SHIFT;
2106 end = wbc->range_end >> PAGE_SHIFT;
2107 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2111 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2113 while (!done && index <= end) {
2114 unsigned int i, nr_pages, found_pages, wsize, credits;
2115 pgoff_t next = 0, tofind, saved_index = index;
2117 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2122 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2124 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2128 add_credits_and_wake_if(server, credits, 0);
2132 if (found_pages == 0) {
2133 kref_put(&wdata->refcount, cifs_writedata_release);
2134 add_credits_and_wake_if(server, credits, 0);
2138 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2139 end, &index, &next, &done);
2141 /* nothing to write? */
2142 if (nr_pages == 0) {
2143 kref_put(&wdata->refcount, cifs_writedata_release);
2144 add_credits_and_wake_if(server, credits, 0);
2148 wdata->credits = credits;
2150 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2152 /* send failure -- clean up the mess */
2154 add_credits_and_wake_if(server, wdata->credits, 0);
2155 for (i = 0; i < nr_pages; ++i) {
2157 redirty_page_for_writepage(wbc,
2160 SetPageError(wdata->pages[i]);
2161 end_page_writeback(wdata->pages[i]);
2162 put_page(wdata->pages[i]);
2165 mapping_set_error(mapping, rc);
2167 kref_put(&wdata->refcount, cifs_writedata_release);
2169 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2170 index = saved_index;
2174 wbc->nr_to_write -= nr_pages;
2175 if (wbc->nr_to_write <= 0)
2181 if (!scanned && !done) {
2183 * We hit the last page and there is more work to be done: wrap
2184 * back to the start of the file
2191 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2192 mapping->writeback_index = index;
2198 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2204 /* BB add check for wbc flags */
2206 if (!PageUptodate(page))
2207 cifs_dbg(FYI, "ppw - page not up to date\n");
2210 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2212 * A writepage() implementation always needs to do either this,
2213 * or re-dirty the page with "redirty_page_for_writepage()" in
2214 * the case of a failure.
2216 * Just unlocking the page will cause the radix tree tag-bits
2217 * to fail to update with the state of the page correctly.
2219 set_page_writeback(page);
2221 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2222 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2224 else if (rc == -EAGAIN)
2225 redirty_page_for_writepage(wbc, page);
2229 SetPageUptodate(page);
2230 end_page_writeback(page);
2236 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2238 int rc = cifs_writepage_locked(page, wbc);
2243 static int cifs_write_end(struct file *file, struct address_space *mapping,
2244 loff_t pos, unsigned len, unsigned copied,
2245 struct page *page, void *fsdata)
2248 struct inode *inode = mapping->host;
2249 struct cifsFileInfo *cfile = file->private_data;
2250 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2253 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2256 pid = current->tgid;
2258 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2261 if (PageChecked(page)) {
2263 SetPageUptodate(page);
2264 ClearPageChecked(page);
2265 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2266 SetPageUptodate(page);
2268 if (!PageUptodate(page)) {
2270 unsigned offset = pos & (PAGE_SIZE - 1);
2274 /* this is probably better than directly calling
2275 partialpage_write since in this function the file handle is
2276 known which we might as well leverage */
2277 /* BB check if anything else missing out of ppw
2278 such as updating last write time */
2279 page_data = kmap(page);
2280 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2281 /* if (rc < 0) should we set writebehind rc? */
2288 set_page_dirty(page);
2292 spin_lock(&inode->i_lock);
2293 if (pos > inode->i_size)
2294 i_size_write(inode, pos);
2295 spin_unlock(&inode->i_lock);
2304 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2309 struct cifs_tcon *tcon;
2310 struct TCP_Server_Info *server;
2311 struct cifsFileInfo *smbfile = file->private_data;
2312 struct inode *inode = file_inode(file);
2313 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2315 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2322 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2325 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2326 rc = cifs_zap_mapping(inode);
2328 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2329 rc = 0; /* don't care about it in fsync */
2333 tcon = tlink_tcon(smbfile->tlink);
2334 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2335 server = tcon->ses->server;
2336 if (server->ops->flush)
2337 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2343 inode_unlock(inode);
2347 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2351 struct cifs_tcon *tcon;
2352 struct TCP_Server_Info *server;
2353 struct cifsFileInfo *smbfile = file->private_data;
2354 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2355 struct inode *inode = file->f_mapping->host;
2357 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2364 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2367 tcon = tlink_tcon(smbfile->tlink);
2368 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2369 server = tcon->ses->server;
2370 if (server->ops->flush)
2371 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2377 inode_unlock(inode);
2382 * As file closes, flush all cached write data for this inode checking
2383 * for write behind errors.
2385 int cifs_flush(struct file *file, fl_owner_t id)
2387 struct inode *inode = file_inode(file);
2390 if (file->f_mode & FMODE_WRITE)
2391 rc = filemap_write_and_wait(inode->i_mapping);
2393 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2399 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2404 for (i = 0; i < num_pages; i++) {
2405 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2408 * save number of pages we have already allocated and
2409 * return with ENOMEM error
2418 for (i = 0; i < num_pages; i++)
2425 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2430 clen = min_t(const size_t, len, wsize);
2431 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2440 cifs_uncached_writedata_release(struct kref *refcount)
2443 struct cifs_writedata *wdata = container_of(refcount,
2444 struct cifs_writedata, refcount);
2446 for (i = 0; i < wdata->nr_pages; i++)
2447 put_page(wdata->pages[i]);
2448 cifs_writedata_release(refcount);
2452 cifs_uncached_writev_complete(struct work_struct *work)
2454 struct cifs_writedata *wdata = container_of(work,
2455 struct cifs_writedata, work);
2456 struct inode *inode = d_inode(wdata->cfile->dentry);
2457 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2459 spin_lock(&inode->i_lock);
2460 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2461 if (cifsi->server_eof > inode->i_size)
2462 i_size_write(inode, cifsi->server_eof);
2463 spin_unlock(&inode->i_lock);
2465 complete(&wdata->done);
2467 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2471 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2472 size_t *len, unsigned long *num_pages)
2474 size_t save_len, copied, bytes, cur_len = *len;
2475 unsigned long i, nr_pages = *num_pages;
2478 for (i = 0; i < nr_pages; i++) {
2479 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2480 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2483 * If we didn't copy as much as we expected, then that
2484 * may mean we trod into an unmapped area. Stop copying
2485 * at that point. On the next pass through the big
2486 * loop, we'll likely end up getting a zero-length
2487 * write and bailing out of it.
2492 cur_len = save_len - cur_len;
2496 * If we have no data to send, then that probably means that
2497 * the copy above failed altogether. That's most likely because
2498 * the address in the iovec was bogus. Return -EFAULT and let
2499 * the caller free anything we allocated and bail out.
2505 * i + 1 now represents the number of pages we actually used in
2506 * the copy phase above.
2513 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2514 struct cifsFileInfo *open_file,
2515 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2519 unsigned long nr_pages, num_pages, i;
2520 struct cifs_writedata *wdata;
2521 struct iov_iter saved_from = *from;
2522 loff_t saved_offset = offset;
2524 struct TCP_Server_Info *server;
2526 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2527 pid = open_file->pid;
2529 pid = current->tgid;
2531 server = tlink_tcon(open_file->tlink)->ses->server;
2534 unsigned int wsize, credits;
2536 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2541 nr_pages = get_numpages(wsize, len, &cur_len);
2542 wdata = cifs_writedata_alloc(nr_pages,
2543 cifs_uncached_writev_complete);
2546 add_credits_and_wake_if(server, credits, 0);
2550 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2553 add_credits_and_wake_if(server, credits, 0);
2557 num_pages = nr_pages;
2558 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2560 for (i = 0; i < nr_pages; i++)
2561 put_page(wdata->pages[i]);
2563 add_credits_and_wake_if(server, credits, 0);
2568 * Bring nr_pages down to the number of pages we actually used,
2569 * and free any pages that we didn't use.
2571 for ( ; nr_pages > num_pages; nr_pages--)
2572 put_page(wdata->pages[nr_pages - 1]);
2574 wdata->sync_mode = WB_SYNC_ALL;
2575 wdata->nr_pages = nr_pages;
2576 wdata->offset = (__u64)offset;
2577 wdata->cfile = cifsFileInfo_get(open_file);
2579 wdata->bytes = cur_len;
2580 wdata->pagesz = PAGE_SIZE;
2581 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2582 wdata->credits = credits;
2584 if (!wdata->cfile->invalidHandle ||
2585 !cifs_reopen_file(wdata->cfile, false))
2586 rc = server->ops->async_writev(wdata,
2587 cifs_uncached_writedata_release);
2589 add_credits_and_wake_if(server, wdata->credits, 0);
2590 kref_put(&wdata->refcount,
2591 cifs_uncached_writedata_release);
2592 if (rc == -EAGAIN) {
2594 iov_iter_advance(from, offset - saved_offset);
2600 list_add_tail(&wdata->list, wdata_list);
2608 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2610 struct file *file = iocb->ki_filp;
2611 ssize_t total_written = 0;
2612 struct cifsFileInfo *open_file;
2613 struct cifs_tcon *tcon;
2614 struct cifs_sb_info *cifs_sb;
2615 struct cifs_writedata *wdata, *tmp;
2616 struct list_head wdata_list;
2617 struct iov_iter saved_from = *from;
2621 * BB - optimize the way when signing is disabled. We can drop this
2622 * extra memory-to-memory copying and use iovec buffers for constructing
2626 rc = generic_write_checks(iocb, from);
2630 INIT_LIST_HEAD(&wdata_list);
2631 cifs_sb = CIFS_FILE_SB(file);
2632 open_file = file->private_data;
2633 tcon = tlink_tcon(open_file->tlink);
2635 if (!tcon->ses->server->ops->async_writev)
2638 rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2639 open_file, cifs_sb, &wdata_list);
2642 * If at least one write was successfully sent, then discard any rc
2643 * value from the later writes. If the other write succeeds, then
2644 * we'll end up returning whatever was written. If it fails, then
2645 * we'll get a new rc value from that.
2647 if (!list_empty(&wdata_list))
2651 * Wait for and collect replies for any successful sends in order of
2652 * increasing offset. Once an error is hit or we get a fatal signal
2653 * while waiting, then return without waiting for any more replies.
2656 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2658 /* FIXME: freezable too? */
2659 rc = wait_for_completion_killable(&wdata->done);
2662 else if (wdata->result)
2665 total_written += wdata->bytes;
2667 /* resend call if it's a retryable error */
2668 if (rc == -EAGAIN) {
2669 struct list_head tmp_list;
2670 struct iov_iter tmp_from = saved_from;
2672 INIT_LIST_HEAD(&tmp_list);
2673 list_del_init(&wdata->list);
2675 iov_iter_advance(&tmp_from,
2676 wdata->offset - iocb->ki_pos);
2678 rc = cifs_write_from_iter(wdata->offset,
2679 wdata->bytes, &tmp_from,
2680 open_file, cifs_sb, &tmp_list);
2682 list_splice(&tmp_list, &wdata_list);
2684 kref_put(&wdata->refcount,
2685 cifs_uncached_writedata_release);
2689 list_del_init(&wdata->list);
2690 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2693 if (unlikely(!total_written))
2696 iocb->ki_pos += total_written;
2697 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2698 cifs_stats_bytes_written(tcon, total_written);
2699 return total_written;
2703 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2705 struct file *file = iocb->ki_filp;
2706 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2707 struct inode *inode = file->f_mapping->host;
2708 struct cifsInodeInfo *cinode = CIFS_I(inode);
2709 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2713 * We need to hold the sem to be sure nobody modifies lock list
2714 * with a brlock that prevents writing.
2716 down_read(&cinode->lock_sem);
2719 rc = generic_write_checks(iocb, from);
2723 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2724 server->vals->exclusive_lock_type, NULL,
2726 rc = __generic_file_write_iter(iocb, from);
2730 inode_unlock(inode);
2733 rc = generic_write_sync(iocb, rc);
2734 up_read(&cinode->lock_sem);
2739 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2741 struct inode *inode = file_inode(iocb->ki_filp);
2742 struct cifsInodeInfo *cinode = CIFS_I(inode);
2743 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2744 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2745 iocb->ki_filp->private_data;
2746 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2749 written = cifs_get_writer(cinode);
2753 if (CIFS_CACHE_WRITE(cinode)) {
2754 if (cap_unix(tcon->ses) &&
2755 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2756 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2757 written = generic_file_write_iter(iocb, from);
2760 written = cifs_writev(iocb, from);
2764 * For non-oplocked files in strict cache mode we need to write the data
2765 * to the server exactly from the pos to pos+len-1 rather than flush all
2766 * affected pages because it may cause a error with mandatory locks on
2767 * these pages but not on the region from pos to ppos+len-1.
2769 written = cifs_user_writev(iocb, from);
2770 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2772 * Windows 7 server can delay breaking level2 oplock if a write
2773 * request comes - break it on the client to prevent reading
2776 cifs_zap_mapping(inode);
2777 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2782 cifs_put_writer(cinode);
2786 static struct cifs_readdata *
2787 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2789 struct cifs_readdata *rdata;
2791 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2793 if (rdata != NULL) {
2794 kref_init(&rdata->refcount);
2795 INIT_LIST_HEAD(&rdata->list);
2796 init_completion(&rdata->done);
2797 INIT_WORK(&rdata->work, complete);
2804 cifs_readdata_release(struct kref *refcount)
2806 struct cifs_readdata *rdata = container_of(refcount,
2807 struct cifs_readdata, refcount);
2810 cifsFileInfo_put(rdata->cfile);
2816 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2822 for (i = 0; i < nr_pages; i++) {
2823 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2828 rdata->pages[i] = page;
2832 for (i = 0; i < nr_pages; i++) {
2833 put_page(rdata->pages[i]);
2834 rdata->pages[i] = NULL;
2841 cifs_uncached_readdata_release(struct kref *refcount)
2843 struct cifs_readdata *rdata = container_of(refcount,
2844 struct cifs_readdata, refcount);
2847 for (i = 0; i < rdata->nr_pages; i++) {
2848 put_page(rdata->pages[i]);
2849 rdata->pages[i] = NULL;
2851 cifs_readdata_release(refcount);
2855 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2856 * @rdata: the readdata response with list of pages holding data
2857 * @iter: destination for our data
2859 * This function copies data from a list of pages in a readdata response into
2860 * an array of iovecs. It will first calculate where the data should go
2861 * based on the info in the readdata and then copy the data into that spot.
2864 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2866 size_t remaining = rdata->got_bytes;
2869 for (i = 0; i < rdata->nr_pages; i++) {
2870 struct page *page = rdata->pages[i];
2871 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2872 size_t written = copy_page_to_iter(page, 0, copy, iter);
2873 remaining -= written;
2874 if (written < copy && iov_iter_count(iter) > 0)
2877 return remaining ? -EFAULT : 0;
2881 cifs_uncached_readv_complete(struct work_struct *work)
2883 struct cifs_readdata *rdata = container_of(work,
2884 struct cifs_readdata, work);
2886 complete(&rdata->done);
2887 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2891 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2892 struct cifs_readdata *rdata, unsigned int len)
2896 unsigned int nr_pages = rdata->nr_pages;
2898 rdata->got_bytes = 0;
2899 rdata->tailsz = PAGE_SIZE;
2900 for (i = 0; i < nr_pages; i++) {
2901 struct page *page = rdata->pages[i];
2905 /* no need to hold page hostage */
2906 rdata->pages[i] = NULL;
2912 if (len >= PAGE_SIZE) {
2913 /* enough data to fill the page */
2917 zero_user(page, len, PAGE_SIZE - len);
2918 rdata->tailsz = len;
2921 result = cifs_read_page_from_socket(server, page, n);
2925 rdata->got_bytes += result;
2928 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2929 rdata->got_bytes : result;
2933 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2934 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2936 struct cifs_readdata *rdata;
2937 unsigned int npages, rsize, credits;
2941 struct TCP_Server_Info *server;
2943 server = tlink_tcon(open_file->tlink)->ses->server;
2945 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2946 pid = open_file->pid;
2948 pid = current->tgid;
2951 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2956 cur_len = min_t(const size_t, len, rsize);
2957 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2959 /* allocate a readdata struct */
2960 rdata = cifs_readdata_alloc(npages,
2961 cifs_uncached_readv_complete);
2963 add_credits_and_wake_if(server, credits, 0);
2968 rc = cifs_read_allocate_pages(rdata, npages);
2972 rdata->cfile = cifsFileInfo_get(open_file);
2973 rdata->nr_pages = npages;
2974 rdata->offset = offset;
2975 rdata->bytes = cur_len;
2977 rdata->pagesz = PAGE_SIZE;
2978 rdata->read_into_pages = cifs_uncached_read_into_pages;
2979 rdata->credits = credits;
2981 if (!rdata->cfile->invalidHandle ||
2982 !cifs_reopen_file(rdata->cfile, true))
2983 rc = server->ops->async_readv(rdata);
2986 add_credits_and_wake_if(server, rdata->credits, 0);
2987 kref_put(&rdata->refcount,
2988 cifs_uncached_readdata_release);
2994 list_add_tail(&rdata->list, rdata_list);
3002 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3004 struct file *file = iocb->ki_filp;
3007 ssize_t total_read = 0;
3008 loff_t offset = iocb->ki_pos;
3009 struct cifs_sb_info *cifs_sb;
3010 struct cifs_tcon *tcon;
3011 struct cifsFileInfo *open_file;
3012 struct cifs_readdata *rdata, *tmp;
3013 struct list_head rdata_list;
3015 len = iov_iter_count(to);
3019 INIT_LIST_HEAD(&rdata_list);
3020 cifs_sb = CIFS_FILE_SB(file);
3021 open_file = file->private_data;
3022 tcon = tlink_tcon(open_file->tlink);
3024 if (!tcon->ses->server->ops->async_readv)
3027 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3028 cifs_dbg(FYI, "attempting read on write only file instance\n");
3030 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3032 /* if at least one read request send succeeded, then reset rc */
3033 if (!list_empty(&rdata_list))
3036 len = iov_iter_count(to);
3037 /* the loop below should proceed in the order of increasing offsets */
3039 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3041 /* FIXME: freezable sleep too? */
3042 rc = wait_for_completion_killable(&rdata->done);
3045 else if (rdata->result == -EAGAIN) {
3046 /* resend call if it's a retryable error */
3047 struct list_head tmp_list;
3048 unsigned int got_bytes = rdata->got_bytes;
3050 list_del_init(&rdata->list);
3051 INIT_LIST_HEAD(&tmp_list);
3054 * Got a part of data and then reconnect has
3055 * happened -- fill the buffer and continue
3058 if (got_bytes && got_bytes < rdata->bytes) {
3059 rc = cifs_readdata_to_iov(rdata, to);
3061 kref_put(&rdata->refcount,
3062 cifs_uncached_readdata_release);
3067 rc = cifs_send_async_read(
3068 rdata->offset + got_bytes,
3069 rdata->bytes - got_bytes,
3070 rdata->cfile, cifs_sb,
3073 list_splice(&tmp_list, &rdata_list);
3075 kref_put(&rdata->refcount,
3076 cifs_uncached_readdata_release);
3078 } else if (rdata->result)
3081 rc = cifs_readdata_to_iov(rdata, to);
3083 /* if there was a short read -- discard anything left */
3084 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3087 list_del_init(&rdata->list);
3088 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3091 total_read = len - iov_iter_count(to);
3093 cifs_stats_bytes_read(tcon, total_read);
3095 /* mask nodata case */
3100 iocb->ki_pos += total_read;
3107 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3109 struct inode *inode = file_inode(iocb->ki_filp);
3110 struct cifsInodeInfo *cinode = CIFS_I(inode);
3111 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3112 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3113 iocb->ki_filp->private_data;
3114 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3118 * In strict cache mode we need to read from the server all the time
3119 * if we don't have level II oplock because the server can delay mtime
3120 * change - so we can't make a decision about inode invalidating.
3121 * And we can also fail with pagereading if there are mandatory locks
3122 * on pages affected by this read but not on the region from pos to
3125 if (!CIFS_CACHE_READ(cinode))
3126 return cifs_user_readv(iocb, to);
3128 if (cap_unix(tcon->ses) &&
3129 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3130 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3131 return generic_file_read_iter(iocb, to);
3134 * We need to hold the sem to be sure nobody modifies lock list
3135 * with a brlock that prevents reading.
3137 down_read(&cinode->lock_sem);
3138 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3139 tcon->ses->server->vals->shared_lock_type,
3140 NULL, CIFS_READ_OP))
3141 rc = generic_file_read_iter(iocb, to);
3142 up_read(&cinode->lock_sem);
3147 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3150 unsigned int bytes_read = 0;
3151 unsigned int total_read;
3152 unsigned int current_read_size;
3154 struct cifs_sb_info *cifs_sb;
3155 struct cifs_tcon *tcon;
3156 struct TCP_Server_Info *server;
3159 struct cifsFileInfo *open_file;
3160 struct cifs_io_parms io_parms;
3161 int buf_type = CIFS_NO_BUFFER;
3165 cifs_sb = CIFS_FILE_SB(file);
3167 /* FIXME: set up handlers for larger reads and/or convert to async */
3168 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3170 if (file->private_data == NULL) {
3175 open_file = file->private_data;
3176 tcon = tlink_tcon(open_file->tlink);
3177 server = tcon->ses->server;
3179 if (!server->ops->sync_read) {
3184 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3185 pid = open_file->pid;
3187 pid = current->tgid;
3189 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3190 cifs_dbg(FYI, "attempting read on write only file instance\n");
3192 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3193 total_read += bytes_read, cur_offset += bytes_read) {
3195 current_read_size = min_t(uint, read_size - total_read,
3198 * For windows me and 9x we do not want to request more
3199 * than it negotiated since it will refuse the read
3202 if ((tcon->ses) && !(tcon->ses->capabilities &
3203 tcon->ses->server->vals->cap_large_files)) {
3204 current_read_size = min_t(uint,
3205 current_read_size, CIFSMaxBufSize);
3207 if (open_file->invalidHandle) {
3208 rc = cifs_reopen_file(open_file, true);
3213 io_parms.tcon = tcon;
3214 io_parms.offset = *offset;
3215 io_parms.length = current_read_size;
3216 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3217 &bytes_read, &cur_offset,
3219 } while (rc == -EAGAIN);
3221 if (rc || (bytes_read == 0)) {
3229 cifs_stats_bytes_read(tcon, total_read);
3230 *offset += bytes_read;
3238 * If the page is mmap'ed into a process' page tables, then we need to make
3239 * sure that it doesn't change while being written back.
3242 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3244 struct page *page = vmf->page;
3247 return VM_FAULT_LOCKED;
3250 static const struct vm_operations_struct cifs_file_vm_ops = {
3251 .fault = filemap_fault,
3252 .map_pages = filemap_map_pages,
3253 .page_mkwrite = cifs_page_mkwrite,
3256 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3259 struct inode *inode = file_inode(file);
3263 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3264 rc = cifs_zap_mapping(inode);
3269 rc = generic_file_mmap(file, vma);
3271 vma->vm_ops = &cifs_file_vm_ops;
3276 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3281 rc = cifs_revalidate_file(file);
3283 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3288 rc = generic_file_mmap(file, vma);
3290 vma->vm_ops = &cifs_file_vm_ops;
3296 cifs_readv_complete(struct work_struct *work)
3298 unsigned int i, got_bytes;
3299 struct cifs_readdata *rdata = container_of(work,
3300 struct cifs_readdata, work);
3302 got_bytes = rdata->got_bytes;
3303 for (i = 0; i < rdata->nr_pages; i++) {
3304 struct page *page = rdata->pages[i];
3306 lru_cache_add_file(page);
3308 if (rdata->result == 0 ||
3309 (rdata->result == -EAGAIN && got_bytes)) {
3310 flush_dcache_page(page);
3311 SetPageUptodate(page);
3316 if (rdata->result == 0 ||
3317 (rdata->result == -EAGAIN && got_bytes))
3318 cifs_readpage_to_fscache(rdata->mapping->host, page);
3320 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3323 rdata->pages[i] = NULL;
3325 kref_put(&rdata->refcount, cifs_readdata_release);
3329 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3330 struct cifs_readdata *rdata, unsigned int len)
3336 unsigned int nr_pages = rdata->nr_pages;
3338 /* determine the eof that the server (probably) has */
3339 eof = CIFS_I(rdata->mapping->host)->server_eof;
3340 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3341 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3343 rdata->got_bytes = 0;
3344 rdata->tailsz = PAGE_SIZE;
3345 for (i = 0; i < nr_pages; i++) {
3346 struct page *page = rdata->pages[i];
3347 size_t n = PAGE_SIZE;
3349 if (len >= PAGE_SIZE) {
3351 } else if (len > 0) {
3352 /* enough for partial page, fill and zero the rest */
3353 zero_user(page, len, PAGE_SIZE - len);
3354 n = rdata->tailsz = len;
3356 } else if (page->index > eof_index) {
3358 * The VFS will not try to do readahead past the
3359 * i_size, but it's possible that we have outstanding
3360 * writes with gaps in the middle and the i_size hasn't
3361 * caught up yet. Populate those with zeroed out pages
3362 * to prevent the VFS from repeatedly attempting to
3363 * fill them until the writes are flushed.
3365 zero_user(page, 0, PAGE_SIZE);
3366 lru_cache_add_file(page);
3367 flush_dcache_page(page);
3368 SetPageUptodate(page);
3371 rdata->pages[i] = NULL;
3375 /* no need to hold page hostage */
3376 lru_cache_add_file(page);
3379 rdata->pages[i] = NULL;
3384 result = cifs_read_page_from_socket(server, page, n);
3388 rdata->got_bytes += result;
3391 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3392 rdata->got_bytes : result;
3396 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3397 unsigned int rsize, struct list_head *tmplist,
3398 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3400 struct page *page, *tpage;
3401 unsigned int expected_index;
3403 gfp_t gfp = readahead_gfp_mask(mapping);
3405 INIT_LIST_HEAD(tmplist);
3407 page = list_entry(page_list->prev, struct page, lru);
3410 * Lock the page and put it in the cache. Since no one else
3411 * should have access to this page, we're safe to simply set
3412 * PG_locked without checking it first.
3414 __SetPageLocked(page);
3415 rc = add_to_page_cache_locked(page, mapping,
3418 /* give up if we can't stick it in the cache */
3420 __ClearPageLocked(page);
3424 /* move first page to the tmplist */
3425 *offset = (loff_t)page->index << PAGE_SHIFT;
3428 list_move_tail(&page->lru, tmplist);
3430 /* now try and add more pages onto the request */
3431 expected_index = page->index + 1;
3432 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3433 /* discontinuity ? */
3434 if (page->index != expected_index)
3437 /* would this page push the read over the rsize? */
3438 if (*bytes + PAGE_SIZE > rsize)
3441 __SetPageLocked(page);
3442 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3443 __ClearPageLocked(page);
3446 list_move_tail(&page->lru, tmplist);
3447 (*bytes) += PAGE_SIZE;
3454 static int cifs_readpages(struct file *file, struct address_space *mapping,
3455 struct list_head *page_list, unsigned num_pages)
3458 struct list_head tmplist;
3459 struct cifsFileInfo *open_file = file->private_data;
3460 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3461 struct TCP_Server_Info *server;
3465 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3466 * immediately if the cookie is negative
3468 * After this point, every page in the list might have PG_fscache set,
3469 * so we will need to clean that up off of every page we don't use.
3471 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3476 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3477 pid = open_file->pid;
3479 pid = current->tgid;
3482 server = tlink_tcon(open_file->tlink)->ses->server;
3484 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3485 __func__, file, mapping, num_pages);
3488 * Start with the page at end of list and move it to private
3489 * list. Do the same with any following pages until we hit
3490 * the rsize limit, hit an index discontinuity, or run out of
3491 * pages. Issue the async read and then start the loop again
3492 * until the list is empty.
3494 * Note that list order is important. The page_list is in
3495 * the order of declining indexes. When we put the pages in
3496 * the rdata->pages, then we want them in increasing order.
3498 while (!list_empty(page_list)) {
3499 unsigned int i, nr_pages, bytes, rsize;
3501 struct page *page, *tpage;
3502 struct cifs_readdata *rdata;
3505 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3511 * Give up immediately if rsize is too small to read an entire
3512 * page. The VFS will fall back to readpage. We should never
3513 * reach this point however since we set ra_pages to 0 when the
3514 * rsize is smaller than a cache page.
3516 if (unlikely(rsize < PAGE_SIZE)) {
3517 add_credits_and_wake_if(server, credits, 0);
3521 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3522 &nr_pages, &offset, &bytes);
3524 add_credits_and_wake_if(server, credits, 0);
3528 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3530 /* best to give up if we're out of mem */
3531 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3532 list_del(&page->lru);
3533 lru_cache_add_file(page);
3538 add_credits_and_wake_if(server, credits, 0);
3542 rdata->cfile = cifsFileInfo_get(open_file);
3543 rdata->mapping = mapping;
3544 rdata->offset = offset;
3545 rdata->bytes = bytes;
3547 rdata->pagesz = PAGE_SIZE;
3548 rdata->read_into_pages = cifs_readpages_read_into_pages;
3549 rdata->credits = credits;
3551 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3552 list_del(&page->lru);
3553 rdata->pages[rdata->nr_pages++] = page;
3556 if (!rdata->cfile->invalidHandle ||
3557 !cifs_reopen_file(rdata->cfile, true))
3558 rc = server->ops->async_readv(rdata);
3560 add_credits_and_wake_if(server, rdata->credits, 0);
3561 for (i = 0; i < rdata->nr_pages; i++) {
3562 page = rdata->pages[i];
3563 lru_cache_add_file(page);
3567 /* Fallback to the readpage in error/reconnect cases */
3568 kref_put(&rdata->refcount, cifs_readdata_release);
3572 kref_put(&rdata->refcount, cifs_readdata_release);
3575 /* Any pages that have been shown to fscache but didn't get added to
3576 * the pagecache must be uncached before they get returned to the
3579 cifs_fscache_readpages_cancel(mapping->host, page_list);
3584 * cifs_readpage_worker must be called with the page pinned
3586 static int cifs_readpage_worker(struct file *file, struct page *page,
3592 /* Is the page cached? */
3593 rc = cifs_readpage_from_fscache(file_inode(file), page);
3597 read_data = kmap(page);
3598 /* for reads over a certain size could initiate async read ahead */
3600 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3605 cifs_dbg(FYI, "Bytes read %d\n", rc);
3607 file_inode(file)->i_atime =
3608 current_time(file_inode(file));
3611 memset(read_data + rc, 0, PAGE_SIZE - rc);
3613 flush_dcache_page(page);
3614 SetPageUptodate(page);
3616 /* send this page to the cache */
3617 cifs_readpage_to_fscache(file_inode(file), page);
3629 static int cifs_readpage(struct file *file, struct page *page)
3631 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3637 if (file->private_data == NULL) {
3643 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3644 page, (int)offset, (int)offset);
3646 rc = cifs_readpage_worker(file, page, &offset);
3652 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3654 struct cifsFileInfo *open_file;
3655 struct cifs_tcon *tcon =
3656 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3658 spin_lock(&tcon->open_file_lock);
3659 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3660 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3661 spin_unlock(&tcon->open_file_lock);
3665 spin_unlock(&tcon->open_file_lock);
3669 /* We do not want to update the file size from server for inodes
3670 open for write - to avoid races with writepage extending
3671 the file - in the future we could consider allowing
3672 refreshing the inode only on increases in the file size
3673 but this is tricky to do without racing with writebehind
3674 page caching in the current Linux kernel design */
3675 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3680 if (is_inode_writable(cifsInode)) {
3681 /* This inode is open for write at least once */
3682 struct cifs_sb_info *cifs_sb;
3684 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3685 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3686 /* since no page cache to corrupt on directio
3687 we can change size safely */
3691 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3699 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3700 loff_t pos, unsigned len, unsigned flags,
3701 struct page **pagep, void **fsdata)
3704 pgoff_t index = pos >> PAGE_SHIFT;
3705 loff_t offset = pos & (PAGE_SIZE - 1);
3706 loff_t page_start = pos & PAGE_MASK;
3711 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3714 page = grab_cache_page_write_begin(mapping, index, flags);
3720 if (PageUptodate(page))
3724 * If we write a full page it will be up to date, no need to read from
3725 * the server. If the write is short, we'll end up doing a sync write
3728 if (len == PAGE_SIZE)
3732 * optimize away the read when we have an oplock, and we're not
3733 * expecting to use any of the data we'd be reading in. That
3734 * is, when the page lies beyond the EOF, or straddles the EOF
3735 * and the write will cover all of the existing data.
3737 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3738 i_size = i_size_read(mapping->host);
3739 if (page_start >= i_size ||
3740 (offset == 0 && (pos + len) >= i_size)) {
3741 zero_user_segments(page, 0, offset,
3745 * PageChecked means that the parts of the page
3746 * to which we're not writing are considered up
3747 * to date. Once the data is copied to the
3748 * page, it can be set uptodate.
3750 SetPageChecked(page);
3755 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3757 * might as well read a page, it is fast enough. If we get
3758 * an error, we don't need to return it. cifs_write_end will
3759 * do a sync write instead since PG_uptodate isn't set.
3761 cifs_readpage_worker(file, page, &page_start);
3766 /* we could try using another file handle if there is one -
3767 but how would we lock it to prevent close of that handle
3768 racing with this read? In any case
3769 this will be written out by write_end so is fine */
3776 static int cifs_release_page(struct page *page, gfp_t gfp)
3778 if (PagePrivate(page))
3781 return cifs_fscache_release_page(page, gfp);
3784 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3785 unsigned int length)
3787 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3789 if (offset == 0 && length == PAGE_SIZE)
3790 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3793 static int cifs_launder_page(struct page *page)
3796 loff_t range_start = page_offset(page);
3797 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3798 struct writeback_control wbc = {
3799 .sync_mode = WB_SYNC_ALL,
3801 .range_start = range_start,
3802 .range_end = range_end,
3805 cifs_dbg(FYI, "Launder page: %p\n", page);
3807 if (clear_page_dirty_for_io(page))
3808 rc = cifs_writepage_locked(page, &wbc);
3810 cifs_fscache_invalidate_page(page, page->mapping->host);
3814 void cifs_oplock_break(struct work_struct *work)
3816 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3818 struct inode *inode = d_inode(cfile->dentry);
3819 struct cifsInodeInfo *cinode = CIFS_I(inode);
3820 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3821 struct TCP_Server_Info *server = tcon->ses->server;
3824 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3825 TASK_UNINTERRUPTIBLE);
3827 server->ops->downgrade_oplock(server, cinode,
3828 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3830 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3831 cifs_has_mand_locks(cinode)) {
3832 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3837 if (inode && S_ISREG(inode->i_mode)) {
3838 if (CIFS_CACHE_READ(cinode))
3839 break_lease(inode, O_RDONLY);
3841 break_lease(inode, O_WRONLY);
3842 rc = filemap_fdatawrite(inode->i_mapping);
3843 if (!CIFS_CACHE_READ(cinode)) {
3844 rc = filemap_fdatawait(inode->i_mapping);
3845 mapping_set_error(inode->i_mapping, rc);
3846 cifs_zap_mapping(inode);
3848 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3851 rc = cifs_push_locks(cfile);
3853 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3856 * releasing stale oplock after recent reconnect of smb session using
3857 * a now incorrect file handle is not a data integrity issue but do
3858 * not bother sending an oplock release if session to server still is
3859 * disconnected since oplock already released by the server
3861 if (!cfile->oplock_break_cancelled) {
3862 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3864 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3866 cifs_done_oplock_break(cinode);
3870 * The presence of cifs_direct_io() in the address space ops vector
3871 * allowes open() O_DIRECT flags which would have failed otherwise.
3873 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3874 * so this method should never be called.
3876 * Direct IO is not yet supported in the cached mode.
3879 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3883 * Eventually need to support direct IO for non forcedirectio mounts
3889 const struct address_space_operations cifs_addr_ops = {
3890 .readpage = cifs_readpage,
3891 .readpages = cifs_readpages,
3892 .writepage = cifs_writepage,
3893 .writepages = cifs_writepages,
3894 .write_begin = cifs_write_begin,
3895 .write_end = cifs_write_end,
3896 .set_page_dirty = __set_page_dirty_nobuffers,
3897 .releasepage = cifs_release_page,
3898 .direct_IO = cifs_direct_io,
3899 .invalidatepage = cifs_invalidate_page,
3900 .launder_page = cifs_launder_page,
3904 * cifs_readpages requires the server to support a buffer large enough to
3905 * contain the header plus one complete page of data. Otherwise, we need
3906 * to leave cifs_readpages out of the address space operations.
3908 const struct address_space_operations cifs_addr_ops_smallbuf = {
3909 .readpage = cifs_readpage,
3910 .writepage = cifs_writepage,
3911 .writepages = cifs_writepages,
3912 .write_begin = cifs_write_begin,
3913 .write_end = cifs_write_end,
3914 .set_page_dirty = __set_page_dirty_nobuffers,
3915 .releasepage = cifs_release_page,
3916 .invalidatepage = cifs_invalidate_page,
3917 .launder_page = cifs_launder_page,