Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         oparms.tcon = tcon;
228         oparms.cifs_sb = cifs_sb;
229         oparms.desired_access = desired_access;
230         oparms.create_options = create_options;
231         oparms.disposition = disposition;
232         oparms.path = full_path;
233         oparms.fid = fid;
234         oparms.reconnect = false;
235
236         rc = server->ops->open(xid, &oparms, oplock, buf);
237
238         if (rc)
239                 goto out;
240
241         if (tcon->unix_ext)
242                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
243                                               xid);
244         else
245                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246                                          xid, fid);
247
248 out:
249         kfree(buf);
250         return rc;
251 }
252
253 static bool
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
255 {
256         struct cifs_fid_locks *cur;
257         bool has_locks = false;
258
259         down_read(&cinode->lock_sem);
260         list_for_each_entry(cur, &cinode->llist, llist) {
261                 if (!list_empty(&cur->locks)) {
262                         has_locks = true;
263                         break;
264                 }
265         }
266         up_read(&cinode->lock_sem);
267         return has_locks;
268 }
269
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272                   struct tcon_link *tlink, __u32 oplock)
273 {
274         struct dentry *dentry = file_dentry(file);
275         struct inode *inode = d_inode(dentry);
276         struct cifsInodeInfo *cinode = CIFS_I(inode);
277         struct cifsFileInfo *cfile;
278         struct cifs_fid_locks *fdlocks;
279         struct cifs_tcon *tcon = tlink_tcon(tlink);
280         struct TCP_Server_Info *server = tcon->ses->server;
281
282         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
283         if (cfile == NULL)
284                 return cfile;
285
286         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
287         if (!fdlocks) {
288                 kfree(cfile);
289                 return NULL;
290         }
291
292         INIT_LIST_HEAD(&fdlocks->locks);
293         fdlocks->cfile = cfile;
294         cfile->llist = fdlocks;
295         down_write(&cinode->lock_sem);
296         list_add(&fdlocks->llist, &cinode->llist);
297         up_write(&cinode->lock_sem);
298
299         cfile->count = 1;
300         cfile->pid = current->tgid;
301         cfile->uid = current_fsuid();
302         cfile->dentry = dget(dentry);
303         cfile->f_flags = file->f_flags;
304         cfile->invalidHandle = false;
305         cfile->tlink = cifs_get_tlink(tlink);
306         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307         mutex_init(&cfile->fh_mutex);
308
309         cifs_sb_active(inode->i_sb);
310
311         /*
312          * If the server returned a read oplock and we have mandatory brlocks,
313          * set oplock level to None.
314          */
315         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
316                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
317                 oplock = 0;
318         }
319
320         spin_lock(&cifs_file_list_lock);
321         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
322                 oplock = fid->pending_open->oplock;
323         list_del(&fid->pending_open->olist);
324
325         fid->purge_cache = false;
326         server->ops->set_fid(cfile, fid, oplock);
327
328         list_add(&cfile->tlist, &tcon->openFileList);
329         /* if readable file instance put first in list*/
330         if (file->f_mode & FMODE_READ)
331                 list_add(&cfile->flist, &cinode->openFileList);
332         else
333                 list_add_tail(&cfile->flist, &cinode->openFileList);
334         spin_unlock(&cifs_file_list_lock);
335
336         if (fid->purge_cache)
337                 cifs_zap_mapping(inode);
338
339         file->private_data = cfile;
340         return cfile;
341 }
342
343 struct cifsFileInfo *
344 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
345 {
346         spin_lock(&cifs_file_list_lock);
347         cifsFileInfo_get_locked(cifs_file);
348         spin_unlock(&cifs_file_list_lock);
349         return cifs_file;
350 }
351
352 /*
353  * Release a reference on the file private data. This may involve closing
354  * the filehandle out on the server. Must be called without holding
355  * cifs_file_list_lock.
356  */
357 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
358 {
359         struct inode *inode = d_inode(cifs_file->dentry);
360         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
361         struct TCP_Server_Info *server = tcon->ses->server;
362         struct cifsInodeInfo *cifsi = CIFS_I(inode);
363         struct super_block *sb = inode->i_sb;
364         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
365         struct cifsLockInfo *li, *tmp;
366         struct cifs_fid fid;
367         struct cifs_pending_open open;
368         bool oplock_break_cancelled;
369
370         spin_lock(&cifs_file_list_lock);
371         if (--cifs_file->count > 0) {
372                 spin_unlock(&cifs_file_list_lock);
373                 return;
374         }
375
376         if (server->ops->get_lease_key)
377                 server->ops->get_lease_key(inode, &fid);
378
379         /* store open in pending opens to make sure we don't miss lease break */
380         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
381
382         /* remove it from the lists */
383         list_del(&cifs_file->flist);
384         list_del(&cifs_file->tlist);
385
386         if (list_empty(&cifsi->openFileList)) {
387                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
388                          d_inode(cifs_file->dentry));
389                 /*
390                  * In strict cache mode we need invalidate mapping on the last
391                  * close  because it may cause a error when we open this file
392                  * again and get at least level II oplock.
393                  */
394                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
395                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
396                 cifs_set_oplock_level(cifsi, 0);
397         }
398         spin_unlock(&cifs_file_list_lock);
399
400         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
401
402         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
403                 struct TCP_Server_Info *server = tcon->ses->server;
404                 unsigned int xid;
405
406                 xid = get_xid();
407                 if (server->ops->close)
408                         server->ops->close(xid, tcon, &cifs_file->fid);
409                 _free_xid(xid);
410         }
411
412         if (oplock_break_cancelled)
413                 cifs_done_oplock_break(cifsi);
414
415         cifs_del_pending_open(&open);
416
417         /*
418          * Delete any outstanding lock records. We'll lose them when the file
419          * is closed anyway.
420          */
421         down_write(&cifsi->lock_sem);
422         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
423                 list_del(&li->llist);
424                 cifs_del_lock_waiters(li);
425                 kfree(li);
426         }
427         list_del(&cifs_file->llist->llist);
428         kfree(cifs_file->llist);
429         up_write(&cifsi->lock_sem);
430
431         cifs_put_tlink(cifs_file->tlink);
432         dput(cifs_file->dentry);
433         cifs_sb_deactive(sb);
434         kfree(cifs_file);
435 }
436
437 int cifs_open(struct inode *inode, struct file *file)
438
439 {
440         int rc = -EACCES;
441         unsigned int xid;
442         __u32 oplock;
443         struct cifs_sb_info *cifs_sb;
444         struct TCP_Server_Info *server;
445         struct cifs_tcon *tcon;
446         struct tcon_link *tlink;
447         struct cifsFileInfo *cfile = NULL;
448         char *full_path = NULL;
449         bool posix_open_ok = false;
450         struct cifs_fid fid;
451         struct cifs_pending_open open;
452
453         xid = get_xid();
454
455         cifs_sb = CIFS_SB(inode->i_sb);
456         tlink = cifs_sb_tlink(cifs_sb);
457         if (IS_ERR(tlink)) {
458                 free_xid(xid);
459                 return PTR_ERR(tlink);
460         }
461         tcon = tlink_tcon(tlink);
462         server = tcon->ses->server;
463
464         full_path = build_path_from_dentry(file_dentry(file));
465         if (full_path == NULL) {
466                 rc = -ENOMEM;
467                 goto out;
468         }
469
470         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
471                  inode, file->f_flags, full_path);
472
473         if (file->f_flags & O_DIRECT &&
474             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
475                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
476                         file->f_op = &cifs_file_direct_nobrl_ops;
477                 else
478                         file->f_op = &cifs_file_direct_ops;
479         }
480
481         if (server->oplocks)
482                 oplock = REQ_OPLOCK;
483         else
484                 oplock = 0;
485
486         if (!tcon->broken_posix_open && tcon->unix_ext &&
487             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
488                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
489                 /* can not refresh inode info since size could be stale */
490                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
491                                 cifs_sb->mnt_file_mode /* ignored */,
492                                 file->f_flags, &oplock, &fid.netfid, xid);
493                 if (rc == 0) {
494                         cifs_dbg(FYI, "posix open succeeded\n");
495                         posix_open_ok = true;
496                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
497                         if (tcon->ses->serverNOS)
498                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
499                                          tcon->ses->serverName,
500                                          tcon->ses->serverNOS);
501                         tcon->broken_posix_open = true;
502                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
503                          (rc != -EOPNOTSUPP)) /* path not found or net err */
504                         goto out;
505                 /*
506                  * Else fallthrough to retry open the old way on network i/o
507                  * or DFS errors.
508                  */
509         }
510
511         if (server->ops->get_lease_key)
512                 server->ops->get_lease_key(inode, &fid);
513
514         cifs_add_pending_open(&fid, tlink, &open);
515
516         if (!posix_open_ok) {
517                 if (server->ops->get_lease_key)
518                         server->ops->get_lease_key(inode, &fid);
519
520                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
521                                   file->f_flags, &oplock, &fid, xid);
522                 if (rc) {
523                         cifs_del_pending_open(&open);
524                         goto out;
525                 }
526         }
527
528         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
529         if (cfile == NULL) {
530                 if (server->ops->close)
531                         server->ops->close(xid, tcon, &fid);
532                 cifs_del_pending_open(&open);
533                 rc = -ENOMEM;
534                 goto out;
535         }
536
537         cifs_fscache_set_inode_cookie(inode, file);
538
539         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
540                 /*
541                  * Time to set mode which we can not set earlier due to
542                  * problems creating new read-only files.
543                  */
544                 struct cifs_unix_set_info_args args = {
545                         .mode   = inode->i_mode,
546                         .uid    = INVALID_UID, /* no change */
547                         .gid    = INVALID_GID, /* no change */
548                         .ctime  = NO_CHANGE_64,
549                         .atime  = NO_CHANGE_64,
550                         .mtime  = NO_CHANGE_64,
551                         .device = 0,
552                 };
553                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
554                                        cfile->pid);
555         }
556
557 out:
558         kfree(full_path);
559         free_xid(xid);
560         cifs_put_tlink(tlink);
561         return rc;
562 }
563
564 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
565
566 /*
567  * Try to reacquire byte range locks that were released when session
568  * to server was lost.
569  */
570 static int
571 cifs_relock_file(struct cifsFileInfo *cfile)
572 {
573         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
574         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
575         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
576         int rc = 0;
577
578         down_read(&cinode->lock_sem);
579         if (cinode->can_cache_brlcks) {
580                 /* can cache locks - no need to relock */
581                 up_read(&cinode->lock_sem);
582                 return rc;
583         }
584
585         if (cap_unix(tcon->ses) &&
586             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
587             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
588                 rc = cifs_push_posix_locks(cfile);
589         else
590                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
591
592         up_read(&cinode->lock_sem);
593         return rc;
594 }
595
596 static int
597 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
598 {
599         int rc = -EACCES;
600         unsigned int xid;
601         __u32 oplock;
602         struct cifs_sb_info *cifs_sb;
603         struct cifs_tcon *tcon;
604         struct TCP_Server_Info *server;
605         struct cifsInodeInfo *cinode;
606         struct inode *inode;
607         char *full_path = NULL;
608         int desired_access;
609         int disposition = FILE_OPEN;
610         int create_options = CREATE_NOT_DIR;
611         struct cifs_open_parms oparms;
612
613         xid = get_xid();
614         mutex_lock(&cfile->fh_mutex);
615         if (!cfile->invalidHandle) {
616                 mutex_unlock(&cfile->fh_mutex);
617                 rc = 0;
618                 free_xid(xid);
619                 return rc;
620         }
621
622         inode = d_inode(cfile->dentry);
623         cifs_sb = CIFS_SB(inode->i_sb);
624         tcon = tlink_tcon(cfile->tlink);
625         server = tcon->ses->server;
626
627         /*
628          * Can not grab rename sem here because various ops, including those
629          * that already have the rename sem can end up causing writepage to get
630          * called and if the server was down that means we end up here, and we
631          * can never tell if the caller already has the rename_sem.
632          */
633         full_path = build_path_from_dentry(cfile->dentry);
634         if (full_path == NULL) {
635                 rc = -ENOMEM;
636                 mutex_unlock(&cfile->fh_mutex);
637                 free_xid(xid);
638                 return rc;
639         }
640
641         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
642                  inode, cfile->f_flags, full_path);
643
644         if (tcon->ses->server->oplocks)
645                 oplock = REQ_OPLOCK;
646         else
647                 oplock = 0;
648
649         if (tcon->unix_ext && cap_unix(tcon->ses) &&
650             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
651                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
652                 /*
653                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
654                  * original open. Must mask them off for a reopen.
655                  */
656                 unsigned int oflags = cfile->f_flags &
657                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
658
659                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
660                                      cifs_sb->mnt_file_mode /* ignored */,
661                                      oflags, &oplock, &cfile->fid.netfid, xid);
662                 if (rc == 0) {
663                         cifs_dbg(FYI, "posix reopen succeeded\n");
664                         oparms.reconnect = true;
665                         goto reopen_success;
666                 }
667                 /*
668                  * fallthrough to retry open the old way on errors, especially
669                  * in the reconnect path it is important to retry hard
670                  */
671         }
672
673         desired_access = cifs_convert_flags(cfile->f_flags);
674
675         if (backup_cred(cifs_sb))
676                 create_options |= CREATE_OPEN_BACKUP_INTENT;
677
678         if (server->ops->get_lease_key)
679                 server->ops->get_lease_key(inode, &cfile->fid);
680
681         oparms.tcon = tcon;
682         oparms.cifs_sb = cifs_sb;
683         oparms.desired_access = desired_access;
684         oparms.create_options = create_options;
685         oparms.disposition = disposition;
686         oparms.path = full_path;
687         oparms.fid = &cfile->fid;
688         oparms.reconnect = true;
689
690         /*
691          * Can not refresh inode by passing in file_info buf to be returned by
692          * ops->open and then calling get_inode_info with returned buf since
693          * file might have write behind data that needs to be flushed and server
694          * version of file size can be stale. If we knew for sure that inode was
695          * not dirty locally we could do this.
696          */
697         rc = server->ops->open(xid, &oparms, &oplock, NULL);
698         if (rc == -ENOENT && oparms.reconnect == false) {
699                 /* durable handle timeout is expired - open the file again */
700                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
701                 /* indicate that we need to relock the file */
702                 oparms.reconnect = true;
703         }
704
705         if (rc) {
706                 mutex_unlock(&cfile->fh_mutex);
707                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
708                 cifs_dbg(FYI, "oplock: %d\n", oplock);
709                 goto reopen_error_exit;
710         }
711
712 reopen_success:
713         cfile->invalidHandle = false;
714         mutex_unlock(&cfile->fh_mutex);
715         cinode = CIFS_I(inode);
716
717         if (can_flush) {
718                 rc = filemap_write_and_wait(inode->i_mapping);
719                 mapping_set_error(inode->i_mapping, rc);
720
721                 if (tcon->unix_ext)
722                         rc = cifs_get_inode_info_unix(&inode, full_path,
723                                                       inode->i_sb, xid);
724                 else
725                         rc = cifs_get_inode_info(&inode, full_path, NULL,
726                                                  inode->i_sb, xid, NULL);
727         }
728         /*
729          * Else we are writing out data to server already and could deadlock if
730          * we tried to flush data, and since we do not know if we have data that
731          * would invalidate the current end of file on the server we can not go
732          * to the server to get the new inode info.
733          */
734
735         server->ops->set_fid(cfile, &cfile->fid, oplock);
736         if (oparms.reconnect)
737                 cifs_relock_file(cfile);
738
739 reopen_error_exit:
740         kfree(full_path);
741         free_xid(xid);
742         return rc;
743 }
744
745 int cifs_close(struct inode *inode, struct file *file)
746 {
747         if (file->private_data != NULL) {
748                 cifsFileInfo_put(file->private_data);
749                 file->private_data = NULL;
750         }
751
752         /* return code from the ->release op is always ignored */
753         return 0;
754 }
755
756 int cifs_closedir(struct inode *inode, struct file *file)
757 {
758         int rc = 0;
759         unsigned int xid;
760         struct cifsFileInfo *cfile = file->private_data;
761         struct cifs_tcon *tcon;
762         struct TCP_Server_Info *server;
763         char *buf;
764
765         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
766
767         if (cfile == NULL)
768                 return rc;
769
770         xid = get_xid();
771         tcon = tlink_tcon(cfile->tlink);
772         server = tcon->ses->server;
773
774         cifs_dbg(FYI, "Freeing private data in close dir\n");
775         spin_lock(&cifs_file_list_lock);
776         if (server->ops->dir_needs_close(cfile)) {
777                 cfile->invalidHandle = true;
778                 spin_unlock(&cifs_file_list_lock);
779                 if (server->ops->close_dir)
780                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
781                 else
782                         rc = -ENOSYS;
783                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
784                 /* not much we can do if it fails anyway, ignore rc */
785                 rc = 0;
786         } else
787                 spin_unlock(&cifs_file_list_lock);
788
789         buf = cfile->srch_inf.ntwrk_buf_start;
790         if (buf) {
791                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
792                 cfile->srch_inf.ntwrk_buf_start = NULL;
793                 if (cfile->srch_inf.smallBuf)
794                         cifs_small_buf_release(buf);
795                 else
796                         cifs_buf_release(buf);
797         }
798
799         cifs_put_tlink(cfile->tlink);
800         kfree(file->private_data);
801         file->private_data = NULL;
802         /* BB can we lock the filestruct while this is going on? */
803         free_xid(xid);
804         return rc;
805 }
806
807 static struct cifsLockInfo *
808 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
809 {
810         struct cifsLockInfo *lock =
811                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
812         if (!lock)
813                 return lock;
814         lock->offset = offset;
815         lock->length = length;
816         lock->type = type;
817         lock->pid = current->tgid;
818         INIT_LIST_HEAD(&lock->blist);
819         init_waitqueue_head(&lock->block_q);
820         return lock;
821 }
822
823 void
824 cifs_del_lock_waiters(struct cifsLockInfo *lock)
825 {
826         struct cifsLockInfo *li, *tmp;
827         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
828                 list_del_init(&li->blist);
829                 wake_up(&li->block_q);
830         }
831 }
832
833 #define CIFS_LOCK_OP    0
834 #define CIFS_READ_OP    1
835 #define CIFS_WRITE_OP   2
836
837 /* @rw_check : 0 - no op, 1 - read, 2 - write */
838 static bool
839 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
840                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
841                             struct cifsLockInfo **conf_lock, int rw_check)
842 {
843         struct cifsLockInfo *li;
844         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
845         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
846
847         list_for_each_entry(li, &fdlocks->locks, llist) {
848                 if (offset + length <= li->offset ||
849                     offset >= li->offset + li->length)
850                         continue;
851                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
852                     server->ops->compare_fids(cfile, cur_cfile)) {
853                         /* shared lock prevents write op through the same fid */
854                         if (!(li->type & server->vals->shared_lock_type) ||
855                             rw_check != CIFS_WRITE_OP)
856                                 continue;
857                 }
858                 if ((type & server->vals->shared_lock_type) &&
859                     ((server->ops->compare_fids(cfile, cur_cfile) &&
860                      current->tgid == li->pid) || type == li->type))
861                         continue;
862                 if (conf_lock)
863                         *conf_lock = li;
864                 return true;
865         }
866         return false;
867 }
868
869 bool
870 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
871                         __u8 type, struct cifsLockInfo **conf_lock,
872                         int rw_check)
873 {
874         bool rc = false;
875         struct cifs_fid_locks *cur;
876         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
877
878         list_for_each_entry(cur, &cinode->llist, llist) {
879                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
880                                                  cfile, conf_lock, rw_check);
881                 if (rc)
882                         break;
883         }
884
885         return rc;
886 }
887
888 /*
889  * Check if there is another lock that prevents us to set the lock (mandatory
890  * style). If such a lock exists, update the flock structure with its
891  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
892  * or leave it the same if we can't. Returns 0 if we don't need to request to
893  * the server or 1 otherwise.
894  */
895 static int
896 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
897                __u8 type, struct file_lock *flock)
898 {
899         int rc = 0;
900         struct cifsLockInfo *conf_lock;
901         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
902         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
903         bool exist;
904
905         down_read(&cinode->lock_sem);
906
907         exist = cifs_find_lock_conflict(cfile, offset, length, type,
908                                         &conf_lock, CIFS_LOCK_OP);
909         if (exist) {
910                 flock->fl_start = conf_lock->offset;
911                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
912                 flock->fl_pid = conf_lock->pid;
913                 if (conf_lock->type & server->vals->shared_lock_type)
914                         flock->fl_type = F_RDLCK;
915                 else
916                         flock->fl_type = F_WRLCK;
917         } else if (!cinode->can_cache_brlcks)
918                 rc = 1;
919         else
920                 flock->fl_type = F_UNLCK;
921
922         up_read(&cinode->lock_sem);
923         return rc;
924 }
925
926 static void
927 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
928 {
929         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
930         down_write(&cinode->lock_sem);
931         list_add_tail(&lock->llist, &cfile->llist->locks);
932         up_write(&cinode->lock_sem);
933 }
934
935 /*
936  * Set the byte-range lock (mandatory style). Returns:
937  * 1) 0, if we set the lock and don't need to request to the server;
938  * 2) 1, if no locks prevent us but we need to request to the server;
939  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
940  */
941 static int
942 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
943                  bool wait)
944 {
945         struct cifsLockInfo *conf_lock;
946         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
947         bool exist;
948         int rc = 0;
949
950 try_again:
951         exist = false;
952         down_write(&cinode->lock_sem);
953
954         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
955                                         lock->type, &conf_lock, CIFS_LOCK_OP);
956         if (!exist && cinode->can_cache_brlcks) {
957                 list_add_tail(&lock->llist, &cfile->llist->locks);
958                 up_write(&cinode->lock_sem);
959                 return rc;
960         }
961
962         if (!exist)
963                 rc = 1;
964         else if (!wait)
965                 rc = -EACCES;
966         else {
967                 list_add_tail(&lock->blist, &conf_lock->blist);
968                 up_write(&cinode->lock_sem);
969                 rc = wait_event_interruptible(lock->block_q,
970                                         (lock->blist.prev == &lock->blist) &&
971                                         (lock->blist.next == &lock->blist));
972                 if (!rc)
973                         goto try_again;
974                 down_write(&cinode->lock_sem);
975                 list_del_init(&lock->blist);
976         }
977
978         up_write(&cinode->lock_sem);
979         return rc;
980 }
981
982 /*
983  * Check if there is another lock that prevents us to set the lock (posix
984  * style). If such a lock exists, update the flock structure with its
985  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
986  * or leave it the same if we can't. Returns 0 if we don't need to request to
987  * the server or 1 otherwise.
988  */
989 static int
990 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
991 {
992         int rc = 0;
993         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
994         unsigned char saved_type = flock->fl_type;
995
996         if ((flock->fl_flags & FL_POSIX) == 0)
997                 return 1;
998
999         down_read(&cinode->lock_sem);
1000         posix_test_lock(file, flock);
1001
1002         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1003                 flock->fl_type = saved_type;
1004                 rc = 1;
1005         }
1006
1007         up_read(&cinode->lock_sem);
1008         return rc;
1009 }
1010
1011 /*
1012  * Set the byte-range lock (posix style). Returns:
1013  * 1) 0, if we set the lock and don't need to request to the server;
1014  * 2) 1, if we need to request to the server;
1015  * 3) <0, if the error occurs while setting the lock.
1016  */
1017 static int
1018 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1019 {
1020         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1021         int rc = 1;
1022
1023         if ((flock->fl_flags & FL_POSIX) == 0)
1024                 return rc;
1025
1026 try_again:
1027         down_write(&cinode->lock_sem);
1028         if (!cinode->can_cache_brlcks) {
1029                 up_write(&cinode->lock_sem);
1030                 return rc;
1031         }
1032
1033         rc = posix_lock_file(file, flock, NULL);
1034         up_write(&cinode->lock_sem);
1035         if (rc == FILE_LOCK_DEFERRED) {
1036                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1037                 if (!rc)
1038                         goto try_again;
1039                 posix_unblock_lock(flock);
1040         }
1041         return rc;
1042 }
1043
1044 int
1045 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1046 {
1047         unsigned int xid;
1048         int rc = 0, stored_rc;
1049         struct cifsLockInfo *li, *tmp;
1050         struct cifs_tcon *tcon;
1051         unsigned int num, max_num, max_buf;
1052         LOCKING_ANDX_RANGE *buf, *cur;
1053         int types[] = {LOCKING_ANDX_LARGE_FILES,
1054                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1055         int i;
1056
1057         xid = get_xid();
1058         tcon = tlink_tcon(cfile->tlink);
1059
1060         /*
1061          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1062          * and check it for zero before using.
1063          */
1064         max_buf = tcon->ses->server->maxBuf;
1065         if (!max_buf) {
1066                 free_xid(xid);
1067                 return -EINVAL;
1068         }
1069
1070         max_num = (max_buf - sizeof(struct smb_hdr)) /
1071                                                 sizeof(LOCKING_ANDX_RANGE);
1072         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1073         if (!buf) {
1074                 free_xid(xid);
1075                 return -ENOMEM;
1076         }
1077
1078         for (i = 0; i < 2; i++) {
1079                 cur = buf;
1080                 num = 0;
1081                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1082                         if (li->type != types[i])
1083                                 continue;
1084                         cur->Pid = cpu_to_le16(li->pid);
1085                         cur->LengthLow = cpu_to_le32((u32)li->length);
1086                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1087                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1088                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1089                         if (++num == max_num) {
1090                                 stored_rc = cifs_lockv(xid, tcon,
1091                                                        cfile->fid.netfid,
1092                                                        (__u8)li->type, 0, num,
1093                                                        buf);
1094                                 if (stored_rc)
1095                                         rc = stored_rc;
1096                                 cur = buf;
1097                                 num = 0;
1098                         } else
1099                                 cur++;
1100                 }
1101
1102                 if (num) {
1103                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1104                                                (__u8)types[i], 0, num, buf);
1105                         if (stored_rc)
1106                                 rc = stored_rc;
1107                 }
1108         }
1109
1110         kfree(buf);
1111         free_xid(xid);
1112         return rc;
1113 }
1114
1115 static __u32
1116 hash_lockowner(fl_owner_t owner)
1117 {
1118         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1119 }
1120
1121 struct lock_to_push {
1122         struct list_head llist;
1123         __u64 offset;
1124         __u64 length;
1125         __u32 pid;
1126         __u16 netfid;
1127         __u8 type;
1128 };
1129
1130 static int
1131 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1132 {
1133         struct inode *inode = d_inode(cfile->dentry);
1134         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1135         struct file_lock *flock;
1136         struct file_lock_context *flctx = inode->i_flctx;
1137         unsigned int count = 0, i;
1138         int rc = 0, xid, type;
1139         struct list_head locks_to_send, *el;
1140         struct lock_to_push *lck, *tmp;
1141         __u64 length;
1142
1143         xid = get_xid();
1144
1145         if (!flctx)
1146                 goto out;
1147
1148         spin_lock(&flctx->flc_lock);
1149         list_for_each(el, &flctx->flc_posix) {
1150                 count++;
1151         }
1152         spin_unlock(&flctx->flc_lock);
1153
1154         INIT_LIST_HEAD(&locks_to_send);
1155
1156         /*
1157          * Allocating count locks is enough because no FL_POSIX locks can be
1158          * added to the list while we are holding cinode->lock_sem that
1159          * protects locking operations of this inode.
1160          */
1161         for (i = 0; i < count; i++) {
1162                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1163                 if (!lck) {
1164                         rc = -ENOMEM;
1165                         goto err_out;
1166                 }
1167                 list_add_tail(&lck->llist, &locks_to_send);
1168         }
1169
1170         el = locks_to_send.next;
1171         spin_lock(&flctx->flc_lock);
1172         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1173                 if (el == &locks_to_send) {
1174                         /*
1175                          * The list ended. We don't have enough allocated
1176                          * structures - something is really wrong.
1177                          */
1178                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1179                         break;
1180                 }
1181                 length = 1 + flock->fl_end - flock->fl_start;
1182                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1183                         type = CIFS_RDLCK;
1184                 else
1185                         type = CIFS_WRLCK;
1186                 lck = list_entry(el, struct lock_to_push, llist);
1187                 lck->pid = hash_lockowner(flock->fl_owner);
1188                 lck->netfid = cfile->fid.netfid;
1189                 lck->length = length;
1190                 lck->type = type;
1191                 lck->offset = flock->fl_start;
1192         }
1193         spin_unlock(&flctx->flc_lock);
1194
1195         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1196                 int stored_rc;
1197
1198                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1199                                              lck->offset, lck->length, NULL,
1200                                              lck->type, 0);
1201                 if (stored_rc)
1202                         rc = stored_rc;
1203                 list_del(&lck->llist);
1204                 kfree(lck);
1205         }
1206
1207 out:
1208         free_xid(xid);
1209         return rc;
1210 err_out:
1211         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1212                 list_del(&lck->llist);
1213                 kfree(lck);
1214         }
1215         goto out;
1216 }
1217
1218 static int
1219 cifs_push_locks(struct cifsFileInfo *cfile)
1220 {
1221         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1222         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1223         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1224         int rc = 0;
1225
1226         /* we are going to update can_cache_brlcks here - need a write access */
1227         down_write(&cinode->lock_sem);
1228         if (!cinode->can_cache_brlcks) {
1229                 up_write(&cinode->lock_sem);
1230                 return rc;
1231         }
1232
1233         if (cap_unix(tcon->ses) &&
1234             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1235             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1236                 rc = cifs_push_posix_locks(cfile);
1237         else
1238                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1239
1240         cinode->can_cache_brlcks = false;
1241         up_write(&cinode->lock_sem);
1242         return rc;
1243 }
1244
1245 static void
1246 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1247                 bool *wait_flag, struct TCP_Server_Info *server)
1248 {
1249         if (flock->fl_flags & FL_POSIX)
1250                 cifs_dbg(FYI, "Posix\n");
1251         if (flock->fl_flags & FL_FLOCK)
1252                 cifs_dbg(FYI, "Flock\n");
1253         if (flock->fl_flags & FL_SLEEP) {
1254                 cifs_dbg(FYI, "Blocking lock\n");
1255                 *wait_flag = true;
1256         }
1257         if (flock->fl_flags & FL_ACCESS)
1258                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1259         if (flock->fl_flags & FL_LEASE)
1260                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1261         if (flock->fl_flags &
1262             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1263                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1264                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1265
1266         *type = server->vals->large_lock_type;
1267         if (flock->fl_type == F_WRLCK) {
1268                 cifs_dbg(FYI, "F_WRLCK\n");
1269                 *type |= server->vals->exclusive_lock_type;
1270                 *lock = 1;
1271         } else if (flock->fl_type == F_UNLCK) {
1272                 cifs_dbg(FYI, "F_UNLCK\n");
1273                 *type |= server->vals->unlock_lock_type;
1274                 *unlock = 1;
1275                 /* Check if unlock includes more than one lock range */
1276         } else if (flock->fl_type == F_RDLCK) {
1277                 cifs_dbg(FYI, "F_RDLCK\n");
1278                 *type |= server->vals->shared_lock_type;
1279                 *lock = 1;
1280         } else if (flock->fl_type == F_EXLCK) {
1281                 cifs_dbg(FYI, "F_EXLCK\n");
1282                 *type |= server->vals->exclusive_lock_type;
1283                 *lock = 1;
1284         } else if (flock->fl_type == F_SHLCK) {
1285                 cifs_dbg(FYI, "F_SHLCK\n");
1286                 *type |= server->vals->shared_lock_type;
1287                 *lock = 1;
1288         } else
1289                 cifs_dbg(FYI, "Unknown type of lock\n");
1290 }
1291
1292 static int
1293 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1294            bool wait_flag, bool posix_lck, unsigned int xid)
1295 {
1296         int rc = 0;
1297         __u64 length = 1 + flock->fl_end - flock->fl_start;
1298         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1299         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1300         struct TCP_Server_Info *server = tcon->ses->server;
1301         __u16 netfid = cfile->fid.netfid;
1302
1303         if (posix_lck) {
1304                 int posix_lock_type;
1305
1306                 rc = cifs_posix_lock_test(file, flock);
1307                 if (!rc)
1308                         return rc;
1309
1310                 if (type & server->vals->shared_lock_type)
1311                         posix_lock_type = CIFS_RDLCK;
1312                 else
1313                         posix_lock_type = CIFS_WRLCK;
1314                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1315                                       hash_lockowner(flock->fl_owner),
1316                                       flock->fl_start, length, flock,
1317                                       posix_lock_type, wait_flag);
1318                 return rc;
1319         }
1320
1321         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1322         if (!rc)
1323                 return rc;
1324
1325         /* BB we could chain these into one lock request BB */
1326         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1327                                     1, 0, false);
1328         if (rc == 0) {
1329                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1330                                             type, 0, 1, false);
1331                 flock->fl_type = F_UNLCK;
1332                 if (rc != 0)
1333                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1334                                  rc);
1335                 return 0;
1336         }
1337
1338         if (type & server->vals->shared_lock_type) {
1339                 flock->fl_type = F_WRLCK;
1340                 return 0;
1341         }
1342
1343         type &= ~server->vals->exclusive_lock_type;
1344
1345         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1346                                     type | server->vals->shared_lock_type,
1347                                     1, 0, false);
1348         if (rc == 0) {
1349                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1350                         type | server->vals->shared_lock_type, 0, 1, false);
1351                 flock->fl_type = F_RDLCK;
1352                 if (rc != 0)
1353                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1354                                  rc);
1355         } else
1356                 flock->fl_type = F_WRLCK;
1357
1358         return 0;
1359 }
1360
1361 void
1362 cifs_move_llist(struct list_head *source, struct list_head *dest)
1363 {
1364         struct list_head *li, *tmp;
1365         list_for_each_safe(li, tmp, source)
1366                 list_move(li, dest);
1367 }
1368
1369 void
1370 cifs_free_llist(struct list_head *llist)
1371 {
1372         struct cifsLockInfo *li, *tmp;
1373         list_for_each_entry_safe(li, tmp, llist, llist) {
1374                 cifs_del_lock_waiters(li);
1375                 list_del(&li->llist);
1376                 kfree(li);
1377         }
1378 }
1379
1380 int
1381 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1382                   unsigned int xid)
1383 {
1384         int rc = 0, stored_rc;
1385         int types[] = {LOCKING_ANDX_LARGE_FILES,
1386                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1387         unsigned int i;
1388         unsigned int max_num, num, max_buf;
1389         LOCKING_ANDX_RANGE *buf, *cur;
1390         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1391         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1392         struct cifsLockInfo *li, *tmp;
1393         __u64 length = 1 + flock->fl_end - flock->fl_start;
1394         struct list_head tmp_llist;
1395
1396         INIT_LIST_HEAD(&tmp_llist);
1397
1398         /*
1399          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1400          * and check it for zero before using.
1401          */
1402         max_buf = tcon->ses->server->maxBuf;
1403         if (!max_buf)
1404                 return -EINVAL;
1405
1406         max_num = (max_buf - sizeof(struct smb_hdr)) /
1407                                                 sizeof(LOCKING_ANDX_RANGE);
1408         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1409         if (!buf)
1410                 return -ENOMEM;
1411
1412         down_write(&cinode->lock_sem);
1413         for (i = 0; i < 2; i++) {
1414                 cur = buf;
1415                 num = 0;
1416                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1417                         if (flock->fl_start > li->offset ||
1418                             (flock->fl_start + length) <
1419                             (li->offset + li->length))
1420                                 continue;
1421                         if (current->tgid != li->pid)
1422                                 continue;
1423                         if (types[i] != li->type)
1424                                 continue;
1425                         if (cinode->can_cache_brlcks) {
1426                                 /*
1427                                  * We can cache brlock requests - simply remove
1428                                  * a lock from the file's list.
1429                                  */
1430                                 list_del(&li->llist);
1431                                 cifs_del_lock_waiters(li);
1432                                 kfree(li);
1433                                 continue;
1434                         }
1435                         cur->Pid = cpu_to_le16(li->pid);
1436                         cur->LengthLow = cpu_to_le32((u32)li->length);
1437                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1438                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1439                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1440                         /*
1441                          * We need to save a lock here to let us add it again to
1442                          * the file's list if the unlock range request fails on
1443                          * the server.
1444                          */
1445                         list_move(&li->llist, &tmp_llist);
1446                         if (++num == max_num) {
1447                                 stored_rc = cifs_lockv(xid, tcon,
1448                                                        cfile->fid.netfid,
1449                                                        li->type, num, 0, buf);
1450                                 if (stored_rc) {
1451                                         /*
1452                                          * We failed on the unlock range
1453                                          * request - add all locks from the tmp
1454                                          * list to the head of the file's list.
1455                                          */
1456                                         cifs_move_llist(&tmp_llist,
1457                                                         &cfile->llist->locks);
1458                                         rc = stored_rc;
1459                                 } else
1460                                         /*
1461                                          * The unlock range request succeed -
1462                                          * free the tmp list.
1463                                          */
1464                                         cifs_free_llist(&tmp_llist);
1465                                 cur = buf;
1466                                 num = 0;
1467                         } else
1468                                 cur++;
1469                 }
1470                 if (num) {
1471                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1472                                                types[i], num, 0, buf);
1473                         if (stored_rc) {
1474                                 cifs_move_llist(&tmp_llist,
1475                                                 &cfile->llist->locks);
1476                                 rc = stored_rc;
1477                         } else
1478                                 cifs_free_llist(&tmp_llist);
1479                 }
1480         }
1481
1482         up_write(&cinode->lock_sem);
1483         kfree(buf);
1484         return rc;
1485 }
1486
1487 static int
1488 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1489            bool wait_flag, bool posix_lck, int lock, int unlock,
1490            unsigned int xid)
1491 {
1492         int rc = 0;
1493         __u64 length = 1 + flock->fl_end - flock->fl_start;
1494         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1495         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1496         struct TCP_Server_Info *server = tcon->ses->server;
1497         struct inode *inode = d_inode(cfile->dentry);
1498
1499         if (posix_lck) {
1500                 int posix_lock_type;
1501
1502                 rc = cifs_posix_lock_set(file, flock);
1503                 if (!rc || rc < 0)
1504                         return rc;
1505
1506                 if (type & server->vals->shared_lock_type)
1507                         posix_lock_type = CIFS_RDLCK;
1508                 else
1509                         posix_lock_type = CIFS_WRLCK;
1510
1511                 if (unlock == 1)
1512                         posix_lock_type = CIFS_UNLCK;
1513
1514                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1515                                       hash_lockowner(flock->fl_owner),
1516                                       flock->fl_start, length,
1517                                       NULL, posix_lock_type, wait_flag);
1518                 goto out;
1519         }
1520
1521         if (lock) {
1522                 struct cifsLockInfo *lock;
1523
1524                 lock = cifs_lock_init(flock->fl_start, length, type);
1525                 if (!lock)
1526                         return -ENOMEM;
1527
1528                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1529                 if (rc < 0) {
1530                         kfree(lock);
1531                         return rc;
1532                 }
1533                 if (!rc)
1534                         goto out;
1535
1536                 /*
1537                  * Windows 7 server can delay breaking lease from read to None
1538                  * if we set a byte-range lock on a file - break it explicitly
1539                  * before sending the lock to the server to be sure the next
1540                  * read won't conflict with non-overlapted locks due to
1541                  * pagereading.
1542                  */
1543                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1544                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1545                         cifs_zap_mapping(inode);
1546                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1547                                  inode);
1548                         CIFS_I(inode)->oplock = 0;
1549                 }
1550
1551                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1552                                             type, 1, 0, wait_flag);
1553                 if (rc) {
1554                         kfree(lock);
1555                         return rc;
1556                 }
1557
1558                 cifs_lock_add(cfile, lock);
1559         } else if (unlock)
1560                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1561
1562 out:
1563         if (flock->fl_flags & FL_POSIX && !rc)
1564                 rc = locks_lock_file_wait(file, flock);
1565         return rc;
1566 }
1567
1568 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1569 {
1570         int rc, xid;
1571         int lock = 0, unlock = 0;
1572         bool wait_flag = false;
1573         bool posix_lck = false;
1574         struct cifs_sb_info *cifs_sb;
1575         struct cifs_tcon *tcon;
1576         struct cifsInodeInfo *cinode;
1577         struct cifsFileInfo *cfile;
1578         __u16 netfid;
1579         __u32 type;
1580
1581         rc = -EACCES;
1582         xid = get_xid();
1583
1584         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1585                  cmd, flock->fl_flags, flock->fl_type,
1586                  flock->fl_start, flock->fl_end);
1587
1588         cfile = (struct cifsFileInfo *)file->private_data;
1589         tcon = tlink_tcon(cfile->tlink);
1590
1591         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1592                         tcon->ses->server);
1593
1594         cifs_sb = CIFS_FILE_SB(file);
1595         netfid = cfile->fid.netfid;
1596         cinode = CIFS_I(file_inode(file));
1597
1598         if (cap_unix(tcon->ses) &&
1599             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1600             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1601                 posix_lck = true;
1602         /*
1603          * BB add code here to normalize offset and length to account for
1604          * negative length which we can not accept over the wire.
1605          */
1606         if (IS_GETLK(cmd)) {
1607                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1608                 free_xid(xid);
1609                 return rc;
1610         }
1611
1612         if (!lock && !unlock) {
1613                 /*
1614                  * if no lock or unlock then nothing to do since we do not
1615                  * know what it is
1616                  */
1617                 free_xid(xid);
1618                 return -EOPNOTSUPP;
1619         }
1620
1621         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1622                         xid);
1623         free_xid(xid);
1624         return rc;
1625 }
1626
1627 /*
1628  * update the file size (if needed) after a write. Should be called with
1629  * the inode->i_lock held
1630  */
1631 void
1632 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1633                       unsigned int bytes_written)
1634 {
1635         loff_t end_of_write = offset + bytes_written;
1636
1637         if (end_of_write > cifsi->server_eof)
1638                 cifsi->server_eof = end_of_write;
1639 }
1640
1641 static ssize_t
1642 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1643            size_t write_size, loff_t *offset)
1644 {
1645         int rc = 0;
1646         unsigned int bytes_written = 0;
1647         unsigned int total_written;
1648         struct cifs_sb_info *cifs_sb;
1649         struct cifs_tcon *tcon;
1650         struct TCP_Server_Info *server;
1651         unsigned int xid;
1652         struct dentry *dentry = open_file->dentry;
1653         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1654         struct cifs_io_parms io_parms;
1655
1656         cifs_sb = CIFS_SB(dentry->d_sb);
1657
1658         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1659                  write_size, *offset, dentry);
1660
1661         tcon = tlink_tcon(open_file->tlink);
1662         server = tcon->ses->server;
1663
1664         if (!server->ops->sync_write)
1665                 return -ENOSYS;
1666
1667         xid = get_xid();
1668
1669         for (total_written = 0; write_size > total_written;
1670              total_written += bytes_written) {
1671                 rc = -EAGAIN;
1672                 while (rc == -EAGAIN) {
1673                         struct kvec iov[2];
1674                         unsigned int len;
1675
1676                         if (open_file->invalidHandle) {
1677                                 /* we could deadlock if we called
1678                                    filemap_fdatawait from here so tell
1679                                    reopen_file not to flush data to
1680                                    server now */
1681                                 rc = cifs_reopen_file(open_file, false);
1682                                 if (rc != 0)
1683                                         break;
1684                         }
1685
1686                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1687                                   (unsigned int)write_size - total_written);
1688                         /* iov[0] is reserved for smb header */
1689                         iov[1].iov_base = (char *)write_data + total_written;
1690                         iov[1].iov_len = len;
1691                         io_parms.pid = pid;
1692                         io_parms.tcon = tcon;
1693                         io_parms.offset = *offset;
1694                         io_parms.length = len;
1695                         rc = server->ops->sync_write(xid, &open_file->fid,
1696                                         &io_parms, &bytes_written, iov, 1);
1697                 }
1698                 if (rc || (bytes_written == 0)) {
1699                         if (total_written)
1700                                 break;
1701                         else {
1702                                 free_xid(xid);
1703                                 return rc;
1704                         }
1705                 } else {
1706                         spin_lock(&d_inode(dentry)->i_lock);
1707                         cifs_update_eof(cifsi, *offset, bytes_written);
1708                         spin_unlock(&d_inode(dentry)->i_lock);
1709                         *offset += bytes_written;
1710                 }
1711         }
1712
1713         cifs_stats_bytes_written(tcon, total_written);
1714
1715         if (total_written > 0) {
1716                 spin_lock(&d_inode(dentry)->i_lock);
1717                 if (*offset > d_inode(dentry)->i_size)
1718                         i_size_write(d_inode(dentry), *offset);
1719                 spin_unlock(&d_inode(dentry)->i_lock);
1720         }
1721         mark_inode_dirty_sync(d_inode(dentry));
1722         free_xid(xid);
1723         return total_written;
1724 }
1725
1726 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1727                                         bool fsuid_only)
1728 {
1729         struct cifsFileInfo *open_file = NULL;
1730         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1731
1732         /* only filter by fsuid on multiuser mounts */
1733         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1734                 fsuid_only = false;
1735
1736         spin_lock(&cifs_file_list_lock);
1737         /* we could simply get the first_list_entry since write-only entries
1738            are always at the end of the list but since the first entry might
1739            have a close pending, we go through the whole list */
1740         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1741                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1742                         continue;
1743                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1744                         if (!open_file->invalidHandle) {
1745                                 /* found a good file */
1746                                 /* lock it so it will not be closed on us */
1747                                 cifsFileInfo_get_locked(open_file);
1748                                 spin_unlock(&cifs_file_list_lock);
1749                                 return open_file;
1750                         } /* else might as well continue, and look for
1751                              another, or simply have the caller reopen it
1752                              again rather than trying to fix this handle */
1753                 } else /* write only file */
1754                         break; /* write only files are last so must be done */
1755         }
1756         spin_unlock(&cifs_file_list_lock);
1757         return NULL;
1758 }
1759
1760 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1761                                         bool fsuid_only)
1762 {
1763         struct cifsFileInfo *open_file, *inv_file = NULL;
1764         struct cifs_sb_info *cifs_sb;
1765         bool any_available = false;
1766         int rc;
1767         unsigned int refind = 0;
1768
1769         /* Having a null inode here (because mapping->host was set to zero by
1770         the VFS or MM) should not happen but we had reports of on oops (due to
1771         it being zero) during stress testcases so we need to check for it */
1772
1773         if (cifs_inode == NULL) {
1774                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1775                 dump_stack();
1776                 return NULL;
1777         }
1778
1779         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1780
1781         /* only filter by fsuid on multiuser mounts */
1782         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1783                 fsuid_only = false;
1784
1785         spin_lock(&cifs_file_list_lock);
1786 refind_writable:
1787         if (refind > MAX_REOPEN_ATT) {
1788                 spin_unlock(&cifs_file_list_lock);
1789                 return NULL;
1790         }
1791         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1792                 if (!any_available && open_file->pid != current->tgid)
1793                         continue;
1794                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1795                         continue;
1796                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1797                         if (!open_file->invalidHandle) {
1798                                 /* found a good writable file */
1799                                 cifsFileInfo_get_locked(open_file);
1800                                 spin_unlock(&cifs_file_list_lock);
1801                                 return open_file;
1802                         } else {
1803                                 if (!inv_file)
1804                                         inv_file = open_file;
1805                         }
1806                 }
1807         }
1808         /* couldn't find useable FH with same pid, try any available */
1809         if (!any_available) {
1810                 any_available = true;
1811                 goto refind_writable;
1812         }
1813
1814         if (inv_file) {
1815                 any_available = false;
1816                 cifsFileInfo_get_locked(inv_file);
1817         }
1818
1819         spin_unlock(&cifs_file_list_lock);
1820
1821         if (inv_file) {
1822                 rc = cifs_reopen_file(inv_file, false);
1823                 if (!rc)
1824                         return inv_file;
1825                 else {
1826                         spin_lock(&cifs_file_list_lock);
1827                         list_move_tail(&inv_file->flist,
1828                                         &cifs_inode->openFileList);
1829                         spin_unlock(&cifs_file_list_lock);
1830                         cifsFileInfo_put(inv_file);
1831                         spin_lock(&cifs_file_list_lock);
1832                         ++refind;
1833                         inv_file = NULL;
1834                         goto refind_writable;
1835                 }
1836         }
1837
1838         return NULL;
1839 }
1840
1841 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1842 {
1843         struct address_space *mapping = page->mapping;
1844         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1845         char *write_data;
1846         int rc = -EFAULT;
1847         int bytes_written = 0;
1848         struct inode *inode;
1849         struct cifsFileInfo *open_file;
1850
1851         if (!mapping || !mapping->host)
1852                 return -EFAULT;
1853
1854         inode = page->mapping->host;
1855
1856         offset += (loff_t)from;
1857         write_data = kmap(page);
1858         write_data += from;
1859
1860         if ((to > PAGE_SIZE) || (from > to)) {
1861                 kunmap(page);
1862                 return -EIO;
1863         }
1864
1865         /* racing with truncate? */
1866         if (offset > mapping->host->i_size) {
1867                 kunmap(page);
1868                 return 0; /* don't care */
1869         }
1870
1871         /* check to make sure that we are not extending the file */
1872         if (mapping->host->i_size - offset < (loff_t)to)
1873                 to = (unsigned)(mapping->host->i_size - offset);
1874
1875         open_file = find_writable_file(CIFS_I(mapping->host), false);
1876         if (open_file) {
1877                 bytes_written = cifs_write(open_file, open_file->pid,
1878                                            write_data, to - from, &offset);
1879                 cifsFileInfo_put(open_file);
1880                 /* Does mm or vfs already set times? */
1881                 inode->i_atime = inode->i_mtime = current_time(inode);
1882                 if ((bytes_written > 0) && (offset))
1883                         rc = 0;
1884                 else if (bytes_written < 0)
1885                         rc = bytes_written;
1886         } else {
1887                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1888                 rc = -EIO;
1889         }
1890
1891         kunmap(page);
1892         return rc;
1893 }
1894
1895 static struct cifs_writedata *
1896 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1897                           pgoff_t end, pgoff_t *index,
1898                           unsigned int *found_pages)
1899 {
1900         unsigned int nr_pages;
1901         struct page **pages;
1902         struct cifs_writedata *wdata;
1903
1904         wdata = cifs_writedata_alloc((unsigned int)tofind,
1905                                      cifs_writev_complete);
1906         if (!wdata)
1907                 return NULL;
1908
1909         /*
1910          * find_get_pages_tag seems to return a max of 256 on each
1911          * iteration, so we must call it several times in order to
1912          * fill the array or the wsize is effectively limited to
1913          * 256 * PAGE_SIZE.
1914          */
1915         *found_pages = 0;
1916         pages = wdata->pages;
1917         do {
1918                 nr_pages = find_get_pages_tag(mapping, index,
1919                                               PAGECACHE_TAG_DIRTY, tofind,
1920                                               pages);
1921                 *found_pages += nr_pages;
1922                 tofind -= nr_pages;
1923                 pages += nr_pages;
1924         } while (nr_pages && tofind && *index <= end);
1925
1926         return wdata;
1927 }
1928
1929 static unsigned int
1930 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1931                     struct address_space *mapping,
1932                     struct writeback_control *wbc,
1933                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1934 {
1935         unsigned int nr_pages = 0, i;
1936         struct page *page;
1937
1938         for (i = 0; i < found_pages; i++) {
1939                 page = wdata->pages[i];
1940                 /*
1941                  * At this point we hold neither mapping->tree_lock nor
1942                  * lock on the page itself: the page may be truncated or
1943                  * invalidated (changing page->mapping to NULL), or even
1944                  * swizzled back from swapper_space to tmpfs file
1945                  * mapping
1946                  */
1947
1948                 if (nr_pages == 0)
1949                         lock_page(page);
1950                 else if (!trylock_page(page))
1951                         break;
1952
1953                 if (unlikely(page->mapping != mapping)) {
1954                         unlock_page(page);
1955                         break;
1956                 }
1957
1958                 if (!wbc->range_cyclic && page->index > end) {
1959                         *done = true;
1960                         unlock_page(page);
1961                         break;
1962                 }
1963
1964                 if (*next && (page->index != *next)) {
1965                         /* Not next consecutive page */
1966                         unlock_page(page);
1967                         break;
1968                 }
1969
1970                 if (wbc->sync_mode != WB_SYNC_NONE)
1971                         wait_on_page_writeback(page);
1972
1973                 if (PageWriteback(page) ||
1974                                 !clear_page_dirty_for_io(page)) {
1975                         unlock_page(page);
1976                         break;
1977                 }
1978
1979                 /*
1980                  * This actually clears the dirty bit in the radix tree.
1981                  * See cifs_writepage() for more commentary.
1982                  */
1983                 set_page_writeback(page);
1984                 if (page_offset(page) >= i_size_read(mapping->host)) {
1985                         *done = true;
1986                         unlock_page(page);
1987                         end_page_writeback(page);
1988                         break;
1989                 }
1990
1991                 wdata->pages[i] = page;
1992                 *next = page->index + 1;
1993                 ++nr_pages;
1994         }
1995
1996         /* reset index to refind any pages skipped */
1997         if (nr_pages == 0)
1998                 *index = wdata->pages[0]->index + 1;
1999
2000         /* put any pages we aren't going to use */
2001         for (i = nr_pages; i < found_pages; i++) {
2002                 put_page(wdata->pages[i]);
2003                 wdata->pages[i] = NULL;
2004         }
2005
2006         return nr_pages;
2007 }
2008
2009 static int
2010 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2011                  struct address_space *mapping, struct writeback_control *wbc)
2012 {
2013         int rc = 0;
2014         struct TCP_Server_Info *server;
2015         unsigned int i;
2016
2017         wdata->sync_mode = wbc->sync_mode;
2018         wdata->nr_pages = nr_pages;
2019         wdata->offset = page_offset(wdata->pages[0]);
2020         wdata->pagesz = PAGE_SIZE;
2021         wdata->tailsz = min(i_size_read(mapping->host) -
2022                         page_offset(wdata->pages[nr_pages - 1]),
2023                         (loff_t)PAGE_SIZE);
2024         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2025
2026         if (wdata->cfile != NULL)
2027                 cifsFileInfo_put(wdata->cfile);
2028         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2029         if (!wdata->cfile) {
2030                 cifs_dbg(VFS, "No writable handles for inode\n");
2031                 rc = -EBADF;
2032         } else {
2033                 wdata->pid = wdata->cfile->pid;
2034                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2035                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2036         }
2037
2038         for (i = 0; i < nr_pages; ++i)
2039                 unlock_page(wdata->pages[i]);
2040
2041         return rc;
2042 }
2043
2044 static int cifs_writepages(struct address_space *mapping,
2045                            struct writeback_control *wbc)
2046 {
2047         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2048         struct TCP_Server_Info *server;
2049         bool done = false, scanned = false, range_whole = false;
2050         pgoff_t end, index;
2051         struct cifs_writedata *wdata;
2052         int rc = 0;
2053
2054         /*
2055          * If wsize is smaller than the page cache size, default to writing
2056          * one page at a time via cifs_writepage
2057          */
2058         if (cifs_sb->wsize < PAGE_SIZE)
2059                 return generic_writepages(mapping, wbc);
2060
2061         if (wbc->range_cyclic) {
2062                 index = mapping->writeback_index; /* Start from prev offset */
2063                 end = -1;
2064         } else {
2065                 index = wbc->range_start >> PAGE_SHIFT;
2066                 end = wbc->range_end >> PAGE_SHIFT;
2067                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2068                         range_whole = true;
2069                 scanned = true;
2070         }
2071         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2072 retry:
2073         while (!done && index <= end) {
2074                 unsigned int i, nr_pages, found_pages, wsize, credits;
2075                 pgoff_t next = 0, tofind, saved_index = index;
2076
2077                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2078                                                    &wsize, &credits);
2079                 if (rc)
2080                         break;
2081
2082                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2083
2084                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2085                                                   &found_pages);
2086                 if (!wdata) {
2087                         rc = -ENOMEM;
2088                         add_credits_and_wake_if(server, credits, 0);
2089                         break;
2090                 }
2091
2092                 if (found_pages == 0) {
2093                         kref_put(&wdata->refcount, cifs_writedata_release);
2094                         add_credits_and_wake_if(server, credits, 0);
2095                         break;
2096                 }
2097
2098                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2099                                                end, &index, &next, &done);
2100
2101                 /* nothing to write? */
2102                 if (nr_pages == 0) {
2103                         kref_put(&wdata->refcount, cifs_writedata_release);
2104                         add_credits_and_wake_if(server, credits, 0);
2105                         continue;
2106                 }
2107
2108                 wdata->credits = credits;
2109
2110                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2111
2112                 /* send failure -- clean up the mess */
2113                 if (rc != 0) {
2114                         add_credits_and_wake_if(server, wdata->credits, 0);
2115                         for (i = 0; i < nr_pages; ++i) {
2116                                 if (rc == -EAGAIN)
2117                                         redirty_page_for_writepage(wbc,
2118                                                            wdata->pages[i]);
2119                                 else
2120                                         SetPageError(wdata->pages[i]);
2121                                 end_page_writeback(wdata->pages[i]);
2122                                 put_page(wdata->pages[i]);
2123                         }
2124                         if (rc != -EAGAIN)
2125                                 mapping_set_error(mapping, rc);
2126                 }
2127                 kref_put(&wdata->refcount, cifs_writedata_release);
2128
2129                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2130                         index = saved_index;
2131                         continue;
2132                 }
2133
2134                 wbc->nr_to_write -= nr_pages;
2135                 if (wbc->nr_to_write <= 0)
2136                         done = true;
2137
2138                 index = next;
2139         }
2140
2141         if (!scanned && !done) {
2142                 /*
2143                  * We hit the last page and there is more work to be done: wrap
2144                  * back to the start of the file
2145                  */
2146                 scanned = true;
2147                 index = 0;
2148                 goto retry;
2149         }
2150
2151         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2152                 mapping->writeback_index = index;
2153
2154         return rc;
2155 }
2156
2157 static int
2158 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2159 {
2160         int rc;
2161         unsigned int xid;
2162
2163         xid = get_xid();
2164 /* BB add check for wbc flags */
2165         get_page(page);
2166         if (!PageUptodate(page))
2167                 cifs_dbg(FYI, "ppw - page not up to date\n");
2168
2169         /*
2170          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2171          *
2172          * A writepage() implementation always needs to do either this,
2173          * or re-dirty the page with "redirty_page_for_writepage()" in
2174          * the case of a failure.
2175          *
2176          * Just unlocking the page will cause the radix tree tag-bits
2177          * to fail to update with the state of the page correctly.
2178          */
2179         set_page_writeback(page);
2180 retry_write:
2181         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2182         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2183                 goto retry_write;
2184         else if (rc == -EAGAIN)
2185                 redirty_page_for_writepage(wbc, page);
2186         else if (rc != 0)
2187                 SetPageError(page);
2188         else
2189                 SetPageUptodate(page);
2190         end_page_writeback(page);
2191         put_page(page);
2192         free_xid(xid);
2193         return rc;
2194 }
2195
2196 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2197 {
2198         int rc = cifs_writepage_locked(page, wbc);
2199         unlock_page(page);
2200         return rc;
2201 }
2202
2203 static int cifs_write_end(struct file *file, struct address_space *mapping,
2204                         loff_t pos, unsigned len, unsigned copied,
2205                         struct page *page, void *fsdata)
2206 {
2207         int rc;
2208         struct inode *inode = mapping->host;
2209         struct cifsFileInfo *cfile = file->private_data;
2210         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2211         __u32 pid;
2212
2213         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2214                 pid = cfile->pid;
2215         else
2216                 pid = current->tgid;
2217
2218         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2219                  page, pos, copied);
2220
2221         if (PageChecked(page)) {
2222                 if (copied == len)
2223                         SetPageUptodate(page);
2224                 ClearPageChecked(page);
2225         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2226                 SetPageUptodate(page);
2227
2228         if (!PageUptodate(page)) {
2229                 char *page_data;
2230                 unsigned offset = pos & (PAGE_SIZE - 1);
2231                 unsigned int xid;
2232
2233                 xid = get_xid();
2234                 /* this is probably better than directly calling
2235                    partialpage_write since in this function the file handle is
2236                    known which we might as well leverage */
2237                 /* BB check if anything else missing out of ppw
2238                    such as updating last write time */
2239                 page_data = kmap(page);
2240                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2241                 /* if (rc < 0) should we set writebehind rc? */
2242                 kunmap(page);
2243
2244                 free_xid(xid);
2245         } else {
2246                 rc = copied;
2247                 pos += copied;
2248                 set_page_dirty(page);
2249         }
2250
2251         if (rc > 0) {
2252                 spin_lock(&inode->i_lock);
2253                 if (pos > inode->i_size)
2254                         i_size_write(inode, pos);
2255                 spin_unlock(&inode->i_lock);
2256         }
2257
2258         unlock_page(page);
2259         put_page(page);
2260
2261         return rc;
2262 }
2263
2264 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2265                       int datasync)
2266 {
2267         unsigned int xid;
2268         int rc = 0;
2269         struct cifs_tcon *tcon;
2270         struct TCP_Server_Info *server;
2271         struct cifsFileInfo *smbfile = file->private_data;
2272         struct inode *inode = file_inode(file);
2273         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2274
2275         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2276         if (rc)
2277                 return rc;
2278         inode_lock(inode);
2279
2280         xid = get_xid();
2281
2282         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2283                  file, datasync);
2284
2285         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2286                 rc = cifs_zap_mapping(inode);
2287                 if (rc) {
2288                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2289                         rc = 0; /* don't care about it in fsync */
2290                 }
2291         }
2292
2293         tcon = tlink_tcon(smbfile->tlink);
2294         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2295                 server = tcon->ses->server;
2296                 if (server->ops->flush)
2297                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2298                 else
2299                         rc = -ENOSYS;
2300         }
2301
2302         free_xid(xid);
2303         inode_unlock(inode);
2304         return rc;
2305 }
2306
2307 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2308 {
2309         unsigned int xid;
2310         int rc = 0;
2311         struct cifs_tcon *tcon;
2312         struct TCP_Server_Info *server;
2313         struct cifsFileInfo *smbfile = file->private_data;
2314         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2315         struct inode *inode = file->f_mapping->host;
2316
2317         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2318         if (rc)
2319                 return rc;
2320         inode_lock(inode);
2321
2322         xid = get_xid();
2323
2324         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2325                  file, datasync);
2326
2327         tcon = tlink_tcon(smbfile->tlink);
2328         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2329                 server = tcon->ses->server;
2330                 if (server->ops->flush)
2331                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2332                 else
2333                         rc = -ENOSYS;
2334         }
2335
2336         free_xid(xid);
2337         inode_unlock(inode);
2338         return rc;
2339 }
2340
2341 /*
2342  * As file closes, flush all cached write data for this inode checking
2343  * for write behind errors.
2344  */
2345 int cifs_flush(struct file *file, fl_owner_t id)
2346 {
2347         struct inode *inode = file_inode(file);
2348         int rc = 0;
2349
2350         if (file->f_mode & FMODE_WRITE)
2351                 rc = filemap_write_and_wait(inode->i_mapping);
2352
2353         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2354
2355         return rc;
2356 }
2357
2358 static int
2359 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2360 {
2361         int rc = 0;
2362         unsigned long i;
2363
2364         for (i = 0; i < num_pages; i++) {
2365                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2366                 if (!pages[i]) {
2367                         /*
2368                          * save number of pages we have already allocated and
2369                          * return with ENOMEM error
2370                          */
2371                         num_pages = i;
2372                         rc = -ENOMEM;
2373                         break;
2374                 }
2375         }
2376
2377         if (rc) {
2378                 for (i = 0; i < num_pages; i++)
2379                         put_page(pages[i]);
2380         }
2381         return rc;
2382 }
2383
2384 static inline
2385 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2386 {
2387         size_t num_pages;
2388         size_t clen;
2389
2390         clen = min_t(const size_t, len, wsize);
2391         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2392
2393         if (cur_len)
2394                 *cur_len = clen;
2395
2396         return num_pages;
2397 }
2398
2399 static void
2400 cifs_uncached_writedata_release(struct kref *refcount)
2401 {
2402         int i;
2403         struct cifs_writedata *wdata = container_of(refcount,
2404                                         struct cifs_writedata, refcount);
2405
2406         for (i = 0; i < wdata->nr_pages; i++)
2407                 put_page(wdata->pages[i]);
2408         cifs_writedata_release(refcount);
2409 }
2410
2411 static void
2412 cifs_uncached_writev_complete(struct work_struct *work)
2413 {
2414         struct cifs_writedata *wdata = container_of(work,
2415                                         struct cifs_writedata, work);
2416         struct inode *inode = d_inode(wdata->cfile->dentry);
2417         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2418
2419         spin_lock(&inode->i_lock);
2420         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2421         if (cifsi->server_eof > inode->i_size)
2422                 i_size_write(inode, cifsi->server_eof);
2423         spin_unlock(&inode->i_lock);
2424
2425         complete(&wdata->done);
2426
2427         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2428 }
2429
2430 static int
2431 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2432                       size_t *len, unsigned long *num_pages)
2433 {
2434         size_t save_len, copied, bytes, cur_len = *len;
2435         unsigned long i, nr_pages = *num_pages;
2436
2437         save_len = cur_len;
2438         for (i = 0; i < nr_pages; i++) {
2439                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2440                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2441                 cur_len -= copied;
2442                 /*
2443                  * If we didn't copy as much as we expected, then that
2444                  * may mean we trod into an unmapped area. Stop copying
2445                  * at that point. On the next pass through the big
2446                  * loop, we'll likely end up getting a zero-length
2447                  * write and bailing out of it.
2448                  */
2449                 if (copied < bytes)
2450                         break;
2451         }
2452         cur_len = save_len - cur_len;
2453         *len = cur_len;
2454
2455         /*
2456          * If we have no data to send, then that probably means that
2457          * the copy above failed altogether. That's most likely because
2458          * the address in the iovec was bogus. Return -EFAULT and let
2459          * the caller free anything we allocated and bail out.
2460          */
2461         if (!cur_len)
2462                 return -EFAULT;
2463
2464         /*
2465          * i + 1 now represents the number of pages we actually used in
2466          * the copy phase above.
2467          */
2468         *num_pages = i + 1;
2469         return 0;
2470 }
2471
2472 static int
2473 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2474                      struct cifsFileInfo *open_file,
2475                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2476 {
2477         int rc = 0;
2478         size_t cur_len;
2479         unsigned long nr_pages, num_pages, i;
2480         struct cifs_writedata *wdata;
2481         struct iov_iter saved_from = *from;
2482         loff_t saved_offset = offset;
2483         pid_t pid;
2484         struct TCP_Server_Info *server;
2485
2486         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2487                 pid = open_file->pid;
2488         else
2489                 pid = current->tgid;
2490
2491         server = tlink_tcon(open_file->tlink)->ses->server;
2492
2493         do {
2494                 unsigned int wsize, credits;
2495
2496                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2497                                                    &wsize, &credits);
2498                 if (rc)
2499                         break;
2500
2501                 nr_pages = get_numpages(wsize, len, &cur_len);
2502                 wdata = cifs_writedata_alloc(nr_pages,
2503                                              cifs_uncached_writev_complete);
2504                 if (!wdata) {
2505                         rc = -ENOMEM;
2506                         add_credits_and_wake_if(server, credits, 0);
2507                         break;
2508                 }
2509
2510                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2511                 if (rc) {
2512                         kfree(wdata);
2513                         add_credits_and_wake_if(server, credits, 0);
2514                         break;
2515                 }
2516
2517                 num_pages = nr_pages;
2518                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2519                 if (rc) {
2520                         for (i = 0; i < nr_pages; i++)
2521                                 put_page(wdata->pages[i]);
2522                         kfree(wdata);
2523                         add_credits_and_wake_if(server, credits, 0);
2524                         break;
2525                 }
2526
2527                 /*
2528                  * Bring nr_pages down to the number of pages we actually used,
2529                  * and free any pages that we didn't use.
2530                  */
2531                 for ( ; nr_pages > num_pages; nr_pages--)
2532                         put_page(wdata->pages[nr_pages - 1]);
2533
2534                 wdata->sync_mode = WB_SYNC_ALL;
2535                 wdata->nr_pages = nr_pages;
2536                 wdata->offset = (__u64)offset;
2537                 wdata->cfile = cifsFileInfo_get(open_file);
2538                 wdata->pid = pid;
2539                 wdata->bytes = cur_len;
2540                 wdata->pagesz = PAGE_SIZE;
2541                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2542                 wdata->credits = credits;
2543
2544                 if (!wdata->cfile->invalidHandle ||
2545                     !cifs_reopen_file(wdata->cfile, false))
2546                         rc = server->ops->async_writev(wdata,
2547                                         cifs_uncached_writedata_release);
2548                 if (rc) {
2549                         add_credits_and_wake_if(server, wdata->credits, 0);
2550                         kref_put(&wdata->refcount,
2551                                  cifs_uncached_writedata_release);
2552                         if (rc == -EAGAIN) {
2553                                 *from = saved_from;
2554                                 iov_iter_advance(from, offset - saved_offset);
2555                                 continue;
2556                         }
2557                         break;
2558                 }
2559
2560                 list_add_tail(&wdata->list, wdata_list);
2561                 offset += cur_len;
2562                 len -= cur_len;
2563         } while (len > 0);
2564
2565         return rc;
2566 }
2567
2568 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2569 {
2570         struct file *file = iocb->ki_filp;
2571         ssize_t total_written = 0;
2572         struct cifsFileInfo *open_file;
2573         struct cifs_tcon *tcon;
2574         struct cifs_sb_info *cifs_sb;
2575         struct cifs_writedata *wdata, *tmp;
2576         struct list_head wdata_list;
2577         struct iov_iter saved_from = *from;
2578         int rc;
2579
2580         /*
2581          * BB - optimize the way when signing is disabled. We can drop this
2582          * extra memory-to-memory copying and use iovec buffers for constructing
2583          * write request.
2584          */
2585
2586         rc = generic_write_checks(iocb, from);
2587         if (rc <= 0)
2588                 return rc;
2589
2590         INIT_LIST_HEAD(&wdata_list);
2591         cifs_sb = CIFS_FILE_SB(file);
2592         open_file = file->private_data;
2593         tcon = tlink_tcon(open_file->tlink);
2594
2595         if (!tcon->ses->server->ops->async_writev)
2596                 return -ENOSYS;
2597
2598         rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2599                                   open_file, cifs_sb, &wdata_list);
2600
2601         /*
2602          * If at least one write was successfully sent, then discard any rc
2603          * value from the later writes. If the other write succeeds, then
2604          * we'll end up returning whatever was written. If it fails, then
2605          * we'll get a new rc value from that.
2606          */
2607         if (!list_empty(&wdata_list))
2608                 rc = 0;
2609
2610         /*
2611          * Wait for and collect replies for any successful sends in order of
2612          * increasing offset. Once an error is hit or we get a fatal signal
2613          * while waiting, then return without waiting for any more replies.
2614          */
2615 restart_loop:
2616         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2617                 if (!rc) {
2618                         /* FIXME: freezable too? */
2619                         rc = wait_for_completion_killable(&wdata->done);
2620                         if (rc)
2621                                 rc = -EINTR;
2622                         else if (wdata->result)
2623                                 rc = wdata->result;
2624                         else
2625                                 total_written += wdata->bytes;
2626
2627                         /* resend call if it's a retryable error */
2628                         if (rc == -EAGAIN) {
2629                                 struct list_head tmp_list;
2630                                 struct iov_iter tmp_from = saved_from;
2631
2632                                 INIT_LIST_HEAD(&tmp_list);
2633                                 list_del_init(&wdata->list);
2634
2635                                 iov_iter_advance(&tmp_from,
2636                                                  wdata->offset - iocb->ki_pos);
2637
2638                                 rc = cifs_write_from_iter(wdata->offset,
2639                                                 wdata->bytes, &tmp_from,
2640                                                 open_file, cifs_sb, &tmp_list);
2641
2642                                 list_splice(&tmp_list, &wdata_list);
2643
2644                                 kref_put(&wdata->refcount,
2645                                          cifs_uncached_writedata_release);
2646                                 goto restart_loop;
2647                         }
2648                 }
2649                 list_del_init(&wdata->list);
2650                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2651         }
2652
2653         if (unlikely(!total_written))
2654                 return rc;
2655
2656         iocb->ki_pos += total_written;
2657         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2658         cifs_stats_bytes_written(tcon, total_written);
2659         return total_written;
2660 }
2661
2662 static ssize_t
2663 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2664 {
2665         struct file *file = iocb->ki_filp;
2666         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2667         struct inode *inode = file->f_mapping->host;
2668         struct cifsInodeInfo *cinode = CIFS_I(inode);
2669         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2670         ssize_t rc;
2671
2672         /*
2673          * We need to hold the sem to be sure nobody modifies lock list
2674          * with a brlock that prevents writing.
2675          */
2676         down_read(&cinode->lock_sem);
2677         inode_lock(inode);
2678
2679         rc = generic_write_checks(iocb, from);
2680         if (rc <= 0)
2681                 goto out;
2682
2683         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2684                                      server->vals->exclusive_lock_type, NULL,
2685                                      CIFS_WRITE_OP))
2686                 rc = __generic_file_write_iter(iocb, from);
2687         else
2688                 rc = -EACCES;
2689 out:
2690         inode_unlock(inode);
2691
2692         if (rc > 0)
2693                 rc = generic_write_sync(iocb, rc);
2694         up_read(&cinode->lock_sem);
2695         return rc;
2696 }
2697
2698 ssize_t
2699 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2700 {
2701         struct inode *inode = file_inode(iocb->ki_filp);
2702         struct cifsInodeInfo *cinode = CIFS_I(inode);
2703         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2704         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2705                                                 iocb->ki_filp->private_data;
2706         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2707         ssize_t written;
2708
2709         written = cifs_get_writer(cinode);
2710         if (written)
2711                 return written;
2712
2713         if (CIFS_CACHE_WRITE(cinode)) {
2714                 if (cap_unix(tcon->ses) &&
2715                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2716                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2717                         written = generic_file_write_iter(iocb, from);
2718                         goto out;
2719                 }
2720                 written = cifs_writev(iocb, from);
2721                 goto out;
2722         }
2723         /*
2724          * For non-oplocked files in strict cache mode we need to write the data
2725          * to the server exactly from the pos to pos+len-1 rather than flush all
2726          * affected pages because it may cause a error with mandatory locks on
2727          * these pages but not on the region from pos to ppos+len-1.
2728          */
2729         written = cifs_user_writev(iocb, from);
2730         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2731                 /*
2732                  * Windows 7 server can delay breaking level2 oplock if a write
2733                  * request comes - break it on the client to prevent reading
2734                  * an old data.
2735                  */
2736                 cifs_zap_mapping(inode);
2737                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2738                          inode);
2739                 cinode->oplock = 0;
2740         }
2741 out:
2742         cifs_put_writer(cinode);
2743         return written;
2744 }
2745
2746 static struct cifs_readdata *
2747 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2748 {
2749         struct cifs_readdata *rdata;
2750
2751         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2752                         GFP_KERNEL);
2753         if (rdata != NULL) {
2754                 kref_init(&rdata->refcount);
2755                 INIT_LIST_HEAD(&rdata->list);
2756                 init_completion(&rdata->done);
2757                 INIT_WORK(&rdata->work, complete);
2758         }
2759
2760         return rdata;
2761 }
2762
2763 void
2764 cifs_readdata_release(struct kref *refcount)
2765 {
2766         struct cifs_readdata *rdata = container_of(refcount,
2767                                         struct cifs_readdata, refcount);
2768
2769         if (rdata->cfile)
2770                 cifsFileInfo_put(rdata->cfile);
2771
2772         kfree(rdata);
2773 }
2774
2775 static int
2776 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2777 {
2778         int rc = 0;
2779         struct page *page;
2780         unsigned int i;
2781
2782         for (i = 0; i < nr_pages; i++) {
2783                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2784                 if (!page) {
2785                         rc = -ENOMEM;
2786                         break;
2787                 }
2788                 rdata->pages[i] = page;
2789         }
2790
2791         if (rc) {
2792                 for (i = 0; i < nr_pages; i++) {
2793                         put_page(rdata->pages[i]);
2794                         rdata->pages[i] = NULL;
2795                 }
2796         }
2797         return rc;
2798 }
2799
2800 static void
2801 cifs_uncached_readdata_release(struct kref *refcount)
2802 {
2803         struct cifs_readdata *rdata = container_of(refcount,
2804                                         struct cifs_readdata, refcount);
2805         unsigned int i;
2806
2807         for (i = 0; i < rdata->nr_pages; i++) {
2808                 put_page(rdata->pages[i]);
2809                 rdata->pages[i] = NULL;
2810         }
2811         cifs_readdata_release(refcount);
2812 }
2813
2814 /**
2815  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2816  * @rdata:      the readdata response with list of pages holding data
2817  * @iter:       destination for our data
2818  *
2819  * This function copies data from a list of pages in a readdata response into
2820  * an array of iovecs. It will first calculate where the data should go
2821  * based on the info in the readdata and then copy the data into that spot.
2822  */
2823 static int
2824 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2825 {
2826         size_t remaining = rdata->got_bytes;
2827         unsigned int i;
2828
2829         for (i = 0; i < rdata->nr_pages; i++) {
2830                 struct page *page = rdata->pages[i];
2831                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2832                 size_t written = copy_page_to_iter(page, 0, copy, iter);
2833                 remaining -= written;
2834                 if (written < copy && iov_iter_count(iter) > 0)
2835                         break;
2836         }
2837         return remaining ? -EFAULT : 0;
2838 }
2839
2840 static void
2841 cifs_uncached_readv_complete(struct work_struct *work)
2842 {
2843         struct cifs_readdata *rdata = container_of(work,
2844                                                 struct cifs_readdata, work);
2845
2846         complete(&rdata->done);
2847         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2848 }
2849
2850 static int
2851 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2852                         struct cifs_readdata *rdata, unsigned int len)
2853 {
2854         int result = 0;
2855         unsigned int i;
2856         unsigned int nr_pages = rdata->nr_pages;
2857
2858         rdata->got_bytes = 0;
2859         rdata->tailsz = PAGE_SIZE;
2860         for (i = 0; i < nr_pages; i++) {
2861                 struct page *page = rdata->pages[i];
2862                 size_t n;
2863
2864                 if (len <= 0) {
2865                         /* no need to hold page hostage */
2866                         rdata->pages[i] = NULL;
2867                         rdata->nr_pages--;
2868                         put_page(page);
2869                         continue;
2870                 }
2871                 n = len;
2872                 if (len >= PAGE_SIZE) {
2873                         /* enough data to fill the page */
2874                         n = PAGE_SIZE;
2875                         len -= n;
2876                 } else {
2877                         zero_user(page, len, PAGE_SIZE - len);
2878                         rdata->tailsz = len;
2879                         len = 0;
2880                 }
2881                 result = cifs_read_page_from_socket(server, page, n);
2882                 if (result < 0)
2883                         break;
2884
2885                 rdata->got_bytes += result;
2886         }
2887
2888         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2889                                                 rdata->got_bytes : result;
2890 }
2891
2892 static int
2893 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2894                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2895 {
2896         struct cifs_readdata *rdata;
2897         unsigned int npages, rsize, credits;
2898         size_t cur_len;
2899         int rc;
2900         pid_t pid;
2901         struct TCP_Server_Info *server;
2902
2903         server = tlink_tcon(open_file->tlink)->ses->server;
2904
2905         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2906                 pid = open_file->pid;
2907         else
2908                 pid = current->tgid;
2909
2910         do {
2911                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2912                                                    &rsize, &credits);
2913                 if (rc)
2914                         break;
2915
2916                 cur_len = min_t(const size_t, len, rsize);
2917                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2918
2919                 /* allocate a readdata struct */
2920                 rdata = cifs_readdata_alloc(npages,
2921                                             cifs_uncached_readv_complete);
2922                 if (!rdata) {
2923                         add_credits_and_wake_if(server, credits, 0);
2924                         rc = -ENOMEM;
2925                         break;
2926                 }
2927
2928                 rc = cifs_read_allocate_pages(rdata, npages);
2929                 if (rc)
2930                         goto error;
2931
2932                 rdata->cfile = cifsFileInfo_get(open_file);
2933                 rdata->nr_pages = npages;
2934                 rdata->offset = offset;
2935                 rdata->bytes = cur_len;
2936                 rdata->pid = pid;
2937                 rdata->pagesz = PAGE_SIZE;
2938                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2939                 rdata->credits = credits;
2940
2941                 if (!rdata->cfile->invalidHandle ||
2942                     !cifs_reopen_file(rdata->cfile, true))
2943                         rc = server->ops->async_readv(rdata);
2944 error:
2945                 if (rc) {
2946                         add_credits_and_wake_if(server, rdata->credits, 0);
2947                         kref_put(&rdata->refcount,
2948                                  cifs_uncached_readdata_release);
2949                         if (rc == -EAGAIN)
2950                                 continue;
2951                         break;
2952                 }
2953
2954                 list_add_tail(&rdata->list, rdata_list);
2955                 offset += cur_len;
2956                 len -= cur_len;
2957         } while (len > 0);
2958
2959         return rc;
2960 }
2961
2962 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2963 {
2964         struct file *file = iocb->ki_filp;
2965         ssize_t rc;
2966         size_t len;
2967         ssize_t total_read = 0;
2968         loff_t offset = iocb->ki_pos;
2969         struct cifs_sb_info *cifs_sb;
2970         struct cifs_tcon *tcon;
2971         struct cifsFileInfo *open_file;
2972         struct cifs_readdata *rdata, *tmp;
2973         struct list_head rdata_list;
2974
2975         len = iov_iter_count(to);
2976         if (!len)
2977                 return 0;
2978
2979         INIT_LIST_HEAD(&rdata_list);
2980         cifs_sb = CIFS_FILE_SB(file);
2981         open_file = file->private_data;
2982         tcon = tlink_tcon(open_file->tlink);
2983
2984         if (!tcon->ses->server->ops->async_readv)
2985                 return -ENOSYS;
2986
2987         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2988                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2989
2990         rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
2991
2992         /* if at least one read request send succeeded, then reset rc */
2993         if (!list_empty(&rdata_list))
2994                 rc = 0;
2995
2996         len = iov_iter_count(to);
2997         /* the loop below should proceed in the order of increasing offsets */
2998 again:
2999         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3000                 if (!rc) {
3001                         /* FIXME: freezable sleep too? */
3002                         rc = wait_for_completion_killable(&rdata->done);
3003                         if (rc)
3004                                 rc = -EINTR;
3005                         else if (rdata->result == -EAGAIN) {
3006                                 /* resend call if it's a retryable error */
3007                                 struct list_head tmp_list;
3008                                 unsigned int got_bytes = rdata->got_bytes;
3009
3010                                 list_del_init(&rdata->list);
3011                                 INIT_LIST_HEAD(&tmp_list);
3012
3013                                 /*
3014                                  * Got a part of data and then reconnect has
3015                                  * happened -- fill the buffer and continue
3016                                  * reading.
3017                                  */
3018                                 if (got_bytes && got_bytes < rdata->bytes) {
3019                                         rc = cifs_readdata_to_iov(rdata, to);
3020                                         if (rc) {
3021                                                 kref_put(&rdata->refcount,
3022                                                 cifs_uncached_readdata_release);
3023                                                 continue;
3024                                         }
3025                                 }
3026
3027                                 rc = cifs_send_async_read(
3028                                                 rdata->offset + got_bytes,
3029                                                 rdata->bytes - got_bytes,
3030                                                 rdata->cfile, cifs_sb,
3031                                                 &tmp_list);
3032
3033                                 list_splice(&tmp_list, &rdata_list);
3034
3035                                 kref_put(&rdata->refcount,
3036                                          cifs_uncached_readdata_release);
3037                                 goto again;
3038                         } else if (rdata->result)
3039                                 rc = rdata->result;
3040                         else
3041                                 rc = cifs_readdata_to_iov(rdata, to);
3042
3043                         /* if there was a short read -- discard anything left */
3044                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3045                                 rc = -ENODATA;
3046                 }
3047                 list_del_init(&rdata->list);
3048                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3049         }
3050
3051         total_read = len - iov_iter_count(to);
3052
3053         cifs_stats_bytes_read(tcon, total_read);
3054
3055         /* mask nodata case */
3056         if (rc == -ENODATA)
3057                 rc = 0;
3058
3059         if (total_read) {
3060                 iocb->ki_pos += total_read;
3061                 return total_read;
3062         }
3063         return rc;
3064 }
3065
3066 ssize_t
3067 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3068 {
3069         struct inode *inode = file_inode(iocb->ki_filp);
3070         struct cifsInodeInfo *cinode = CIFS_I(inode);
3071         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3072         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3073                                                 iocb->ki_filp->private_data;
3074         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3075         int rc = -EACCES;
3076
3077         /*
3078          * In strict cache mode we need to read from the server all the time
3079          * if we don't have level II oplock because the server can delay mtime
3080          * change - so we can't make a decision about inode invalidating.
3081          * And we can also fail with pagereading if there are mandatory locks
3082          * on pages affected by this read but not on the region from pos to
3083          * pos+len-1.
3084          */
3085         if (!CIFS_CACHE_READ(cinode))
3086                 return cifs_user_readv(iocb, to);
3087
3088         if (cap_unix(tcon->ses) &&
3089             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3090             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3091                 return generic_file_read_iter(iocb, to);
3092
3093         /*
3094          * We need to hold the sem to be sure nobody modifies lock list
3095          * with a brlock that prevents reading.
3096          */
3097         down_read(&cinode->lock_sem);
3098         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3099                                      tcon->ses->server->vals->shared_lock_type,
3100                                      NULL, CIFS_READ_OP))
3101                 rc = generic_file_read_iter(iocb, to);
3102         up_read(&cinode->lock_sem);
3103         return rc;
3104 }
3105
3106 static ssize_t
3107 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3108 {
3109         int rc = -EACCES;
3110         unsigned int bytes_read = 0;
3111         unsigned int total_read;
3112         unsigned int current_read_size;
3113         unsigned int rsize;
3114         struct cifs_sb_info *cifs_sb;
3115         struct cifs_tcon *tcon;
3116         struct TCP_Server_Info *server;
3117         unsigned int xid;
3118         char *cur_offset;
3119         struct cifsFileInfo *open_file;
3120         struct cifs_io_parms io_parms;
3121         int buf_type = CIFS_NO_BUFFER;
3122         __u32 pid;
3123
3124         xid = get_xid();
3125         cifs_sb = CIFS_FILE_SB(file);
3126
3127         /* FIXME: set up handlers for larger reads and/or convert to async */
3128         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3129
3130         if (file->private_data == NULL) {
3131                 rc = -EBADF;
3132                 free_xid(xid);
3133                 return rc;
3134         }
3135         open_file = file->private_data;
3136         tcon = tlink_tcon(open_file->tlink);
3137         server = tcon->ses->server;
3138
3139         if (!server->ops->sync_read) {
3140                 free_xid(xid);
3141                 return -ENOSYS;
3142         }
3143
3144         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3145                 pid = open_file->pid;
3146         else
3147                 pid = current->tgid;
3148
3149         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3150                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3151
3152         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3153              total_read += bytes_read, cur_offset += bytes_read) {
3154                 do {
3155                         current_read_size = min_t(uint, read_size - total_read,
3156                                                   rsize);
3157                         /*
3158                          * For windows me and 9x we do not want to request more
3159                          * than it negotiated since it will refuse the read
3160                          * then.
3161                          */
3162                         if ((tcon->ses) && !(tcon->ses->capabilities &
3163                                 tcon->ses->server->vals->cap_large_files)) {
3164                                 current_read_size = min_t(uint,
3165                                         current_read_size, CIFSMaxBufSize);
3166                         }
3167                         if (open_file->invalidHandle) {
3168                                 rc = cifs_reopen_file(open_file, true);
3169                                 if (rc != 0)
3170                                         break;
3171                         }
3172                         io_parms.pid = pid;
3173                         io_parms.tcon = tcon;
3174                         io_parms.offset = *offset;
3175                         io_parms.length = current_read_size;
3176                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3177                                                     &bytes_read, &cur_offset,
3178                                                     &buf_type);
3179                 } while (rc == -EAGAIN);
3180
3181                 if (rc || (bytes_read == 0)) {
3182                         if (total_read) {
3183                                 break;
3184                         } else {
3185                                 free_xid(xid);
3186                                 return rc;
3187                         }
3188                 } else {
3189                         cifs_stats_bytes_read(tcon, total_read);
3190                         *offset += bytes_read;
3191                 }
3192         }
3193         free_xid(xid);
3194         return total_read;
3195 }
3196
3197 /*
3198  * If the page is mmap'ed into a process' page tables, then we need to make
3199  * sure that it doesn't change while being written back.
3200  */
3201 static int
3202 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3203 {
3204         struct page *page = vmf->page;
3205
3206         lock_page(page);
3207         return VM_FAULT_LOCKED;
3208 }
3209
3210 static const struct vm_operations_struct cifs_file_vm_ops = {
3211         .fault = filemap_fault,
3212         .map_pages = filemap_map_pages,
3213         .page_mkwrite = cifs_page_mkwrite,
3214 };
3215
3216 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3217 {
3218         int rc, xid;
3219         struct inode *inode = file_inode(file);
3220
3221         xid = get_xid();
3222
3223         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3224                 rc = cifs_zap_mapping(inode);
3225                 if (rc)
3226                         return rc;
3227         }
3228
3229         rc = generic_file_mmap(file, vma);
3230         if (rc == 0)
3231                 vma->vm_ops = &cifs_file_vm_ops;
3232         free_xid(xid);
3233         return rc;
3234 }
3235
3236 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3237 {
3238         int rc, xid;
3239
3240         xid = get_xid();
3241         rc = cifs_revalidate_file(file);
3242         if (rc) {
3243                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3244                          rc);
3245                 free_xid(xid);
3246                 return rc;
3247         }
3248         rc = generic_file_mmap(file, vma);
3249         if (rc == 0)
3250                 vma->vm_ops = &cifs_file_vm_ops;
3251         free_xid(xid);
3252         return rc;
3253 }
3254
3255 static void
3256 cifs_readv_complete(struct work_struct *work)
3257 {
3258         unsigned int i, got_bytes;
3259         struct cifs_readdata *rdata = container_of(work,
3260                                                 struct cifs_readdata, work);
3261
3262         got_bytes = rdata->got_bytes;
3263         for (i = 0; i < rdata->nr_pages; i++) {
3264                 struct page *page = rdata->pages[i];
3265
3266                 lru_cache_add_file(page);
3267
3268                 if (rdata->result == 0 ||
3269                     (rdata->result == -EAGAIN && got_bytes)) {
3270                         flush_dcache_page(page);
3271                         SetPageUptodate(page);
3272                 }
3273
3274                 unlock_page(page);
3275
3276                 if (rdata->result == 0 ||
3277                     (rdata->result == -EAGAIN && got_bytes))
3278                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3279
3280                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3281
3282                 put_page(page);
3283                 rdata->pages[i] = NULL;
3284         }
3285         kref_put(&rdata->refcount, cifs_readdata_release);
3286 }
3287
3288 static int
3289 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3290                         struct cifs_readdata *rdata, unsigned int len)
3291 {
3292         int result = 0;
3293         unsigned int i;
3294         u64 eof;
3295         pgoff_t eof_index;
3296         unsigned int nr_pages = rdata->nr_pages;
3297
3298         /* determine the eof that the server (probably) has */
3299         eof = CIFS_I(rdata->mapping->host)->server_eof;
3300         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3301         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3302
3303         rdata->got_bytes = 0;
3304         rdata->tailsz = PAGE_SIZE;
3305         for (i = 0; i < nr_pages; i++) {
3306                 struct page *page = rdata->pages[i];
3307                 size_t n = PAGE_SIZE;
3308
3309                 if (len >= PAGE_SIZE) {
3310                         len -= PAGE_SIZE;
3311                 } else if (len > 0) {
3312                         /* enough for partial page, fill and zero the rest */
3313                         zero_user(page, len, PAGE_SIZE - len);
3314                         n = rdata->tailsz = len;
3315                         len = 0;
3316                 } else if (page->index > eof_index) {
3317                         /*
3318                          * The VFS will not try to do readahead past the
3319                          * i_size, but it's possible that we have outstanding
3320                          * writes with gaps in the middle and the i_size hasn't
3321                          * caught up yet. Populate those with zeroed out pages
3322                          * to prevent the VFS from repeatedly attempting to
3323                          * fill them until the writes are flushed.
3324                          */
3325                         zero_user(page, 0, PAGE_SIZE);
3326                         lru_cache_add_file(page);
3327                         flush_dcache_page(page);
3328                         SetPageUptodate(page);
3329                         unlock_page(page);
3330                         put_page(page);
3331                         rdata->pages[i] = NULL;
3332                         rdata->nr_pages--;
3333                         continue;
3334                 } else {
3335                         /* no need to hold page hostage */
3336                         lru_cache_add_file(page);
3337                         unlock_page(page);
3338                         put_page(page);
3339                         rdata->pages[i] = NULL;
3340                         rdata->nr_pages--;
3341                         continue;
3342                 }
3343
3344                 result = cifs_read_page_from_socket(server, page, n);
3345                 if (result < 0)
3346                         break;
3347
3348                 rdata->got_bytes += result;
3349         }
3350
3351         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3352                                                 rdata->got_bytes : result;
3353 }
3354
3355 static int
3356 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3357                     unsigned int rsize, struct list_head *tmplist,
3358                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3359 {
3360         struct page *page, *tpage;
3361         unsigned int expected_index;
3362         int rc;
3363         gfp_t gfp = readahead_gfp_mask(mapping);
3364
3365         INIT_LIST_HEAD(tmplist);
3366
3367         page = list_entry(page_list->prev, struct page, lru);
3368
3369         /*
3370          * Lock the page and put it in the cache. Since no one else
3371          * should have access to this page, we're safe to simply set
3372          * PG_locked without checking it first.
3373          */
3374         __SetPageLocked(page);
3375         rc = add_to_page_cache_locked(page, mapping,
3376                                       page->index, gfp);
3377
3378         /* give up if we can't stick it in the cache */
3379         if (rc) {
3380                 __ClearPageLocked(page);
3381                 return rc;
3382         }
3383
3384         /* move first page to the tmplist */
3385         *offset = (loff_t)page->index << PAGE_SHIFT;
3386         *bytes = PAGE_SIZE;
3387         *nr_pages = 1;
3388         list_move_tail(&page->lru, tmplist);
3389
3390         /* now try and add more pages onto the request */
3391         expected_index = page->index + 1;
3392         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3393                 /* discontinuity ? */
3394                 if (page->index != expected_index)
3395                         break;
3396
3397                 /* would this page push the read over the rsize? */
3398                 if (*bytes + PAGE_SIZE > rsize)
3399                         break;
3400
3401                 __SetPageLocked(page);
3402                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3403                         __ClearPageLocked(page);
3404                         break;
3405                 }
3406                 list_move_tail(&page->lru, tmplist);
3407                 (*bytes) += PAGE_SIZE;
3408                 expected_index++;
3409                 (*nr_pages)++;
3410         }
3411         return rc;
3412 }
3413
3414 static int cifs_readpages(struct file *file, struct address_space *mapping,
3415         struct list_head *page_list, unsigned num_pages)
3416 {
3417         int rc;
3418         struct list_head tmplist;
3419         struct cifsFileInfo *open_file = file->private_data;
3420         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3421         struct TCP_Server_Info *server;
3422         pid_t pid;
3423
3424         /*
3425          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3426          * immediately if the cookie is negative
3427          *
3428          * After this point, every page in the list might have PG_fscache set,
3429          * so we will need to clean that up off of every page we don't use.
3430          */
3431         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3432                                          &num_pages);
3433         if (rc == 0)
3434                 return rc;
3435
3436         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3437                 pid = open_file->pid;
3438         else
3439                 pid = current->tgid;
3440
3441         rc = 0;
3442         server = tlink_tcon(open_file->tlink)->ses->server;
3443
3444         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3445                  __func__, file, mapping, num_pages);
3446
3447         /*
3448          * Start with the page at end of list and move it to private
3449          * list. Do the same with any following pages until we hit
3450          * the rsize limit, hit an index discontinuity, or run out of
3451          * pages. Issue the async read and then start the loop again
3452          * until the list is empty.
3453          *
3454          * Note that list order is important. The page_list is in
3455          * the order of declining indexes. When we put the pages in
3456          * the rdata->pages, then we want them in increasing order.
3457          */
3458         while (!list_empty(page_list)) {
3459                 unsigned int i, nr_pages, bytes, rsize;
3460                 loff_t offset;
3461                 struct page *page, *tpage;
3462                 struct cifs_readdata *rdata;
3463                 unsigned credits;
3464
3465                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3466                                                    &rsize, &credits);
3467                 if (rc)
3468                         break;
3469
3470                 /*
3471                  * Give up immediately if rsize is too small to read an entire
3472                  * page. The VFS will fall back to readpage. We should never
3473                  * reach this point however since we set ra_pages to 0 when the
3474                  * rsize is smaller than a cache page.
3475                  */
3476                 if (unlikely(rsize < PAGE_SIZE)) {
3477                         add_credits_and_wake_if(server, credits, 0);
3478                         return 0;
3479                 }
3480
3481                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3482                                          &nr_pages, &offset, &bytes);
3483                 if (rc) {
3484                         add_credits_and_wake_if(server, credits, 0);
3485                         break;
3486                 }
3487
3488                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3489                 if (!rdata) {
3490                         /* best to give up if we're out of mem */
3491                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3492                                 list_del(&page->lru);
3493                                 lru_cache_add_file(page);
3494                                 unlock_page(page);
3495                                 put_page(page);
3496                         }
3497                         rc = -ENOMEM;
3498                         add_credits_and_wake_if(server, credits, 0);
3499                         break;
3500                 }
3501
3502                 rdata->cfile = cifsFileInfo_get(open_file);
3503                 rdata->mapping = mapping;
3504                 rdata->offset = offset;
3505                 rdata->bytes = bytes;
3506                 rdata->pid = pid;
3507                 rdata->pagesz = PAGE_SIZE;
3508                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3509                 rdata->credits = credits;
3510
3511                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3512                         list_del(&page->lru);
3513                         rdata->pages[rdata->nr_pages++] = page;
3514                 }
3515
3516                 if (!rdata->cfile->invalidHandle ||
3517                     !cifs_reopen_file(rdata->cfile, true))
3518                         rc = server->ops->async_readv(rdata);
3519                 if (rc) {
3520                         add_credits_and_wake_if(server, rdata->credits, 0);
3521                         for (i = 0; i < rdata->nr_pages; i++) {
3522                                 page = rdata->pages[i];
3523                                 lru_cache_add_file(page);
3524                                 unlock_page(page);
3525                                 put_page(page);
3526                         }
3527                         /* Fallback to the readpage in error/reconnect cases */
3528                         kref_put(&rdata->refcount, cifs_readdata_release);
3529                         break;
3530                 }
3531
3532                 kref_put(&rdata->refcount, cifs_readdata_release);
3533         }
3534
3535         /* Any pages that have been shown to fscache but didn't get added to
3536          * the pagecache must be uncached before they get returned to the
3537          * allocator.
3538          */
3539         cifs_fscache_readpages_cancel(mapping->host, page_list);
3540         return rc;
3541 }
3542
3543 /*
3544  * cifs_readpage_worker must be called with the page pinned
3545  */
3546 static int cifs_readpage_worker(struct file *file, struct page *page,
3547         loff_t *poffset)
3548 {
3549         char *read_data;
3550         int rc;
3551
3552         /* Is the page cached? */
3553         rc = cifs_readpage_from_fscache(file_inode(file), page);
3554         if (rc == 0)
3555                 goto read_complete;
3556
3557         read_data = kmap(page);
3558         /* for reads over a certain size could initiate async read ahead */
3559
3560         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3561
3562         if (rc < 0)
3563                 goto io_error;
3564         else
3565                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3566
3567         file_inode(file)->i_atime =
3568                 current_time(file_inode(file));
3569
3570         if (PAGE_SIZE > rc)
3571                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3572
3573         flush_dcache_page(page);
3574         SetPageUptodate(page);
3575
3576         /* send this page to the cache */
3577         cifs_readpage_to_fscache(file_inode(file), page);
3578
3579         rc = 0;
3580
3581 io_error:
3582         kunmap(page);
3583         unlock_page(page);
3584
3585 read_complete:
3586         return rc;
3587 }
3588
3589 static int cifs_readpage(struct file *file, struct page *page)
3590 {
3591         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3592         int rc = -EACCES;
3593         unsigned int xid;
3594
3595         xid = get_xid();
3596
3597         if (file->private_data == NULL) {
3598                 rc = -EBADF;
3599                 free_xid(xid);
3600                 return rc;
3601         }
3602
3603         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3604                  page, (int)offset, (int)offset);
3605
3606         rc = cifs_readpage_worker(file, page, &offset);
3607
3608         free_xid(xid);
3609         return rc;
3610 }
3611
3612 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3613 {
3614         struct cifsFileInfo *open_file;
3615
3616         spin_lock(&cifs_file_list_lock);
3617         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3618                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3619                         spin_unlock(&cifs_file_list_lock);
3620                         return 1;
3621                 }
3622         }
3623         spin_unlock(&cifs_file_list_lock);
3624         return 0;
3625 }
3626
3627 /* We do not want to update the file size from server for inodes
3628    open for write - to avoid races with writepage extending
3629    the file - in the future we could consider allowing
3630    refreshing the inode only on increases in the file size
3631    but this is tricky to do without racing with writebehind
3632    page caching in the current Linux kernel design */
3633 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3634 {
3635         if (!cifsInode)
3636                 return true;
3637
3638         if (is_inode_writable(cifsInode)) {
3639                 /* This inode is open for write at least once */
3640                 struct cifs_sb_info *cifs_sb;
3641
3642                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3643                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3644                         /* since no page cache to corrupt on directio
3645                         we can change size safely */
3646                         return true;
3647                 }
3648
3649                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3650                         return true;
3651
3652                 return false;
3653         } else
3654                 return true;
3655 }
3656
3657 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3658                         loff_t pos, unsigned len, unsigned flags,
3659                         struct page **pagep, void **fsdata)
3660 {
3661         int oncethru = 0;
3662         pgoff_t index = pos >> PAGE_SHIFT;
3663         loff_t offset = pos & (PAGE_SIZE - 1);
3664         loff_t page_start = pos & PAGE_MASK;
3665         loff_t i_size;
3666         struct page *page;
3667         int rc = 0;
3668
3669         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3670
3671 start:
3672         page = grab_cache_page_write_begin(mapping, index, flags);
3673         if (!page) {
3674                 rc = -ENOMEM;
3675                 goto out;
3676         }
3677
3678         if (PageUptodate(page))
3679                 goto out;
3680
3681         /*
3682          * If we write a full page it will be up to date, no need to read from
3683          * the server. If the write is short, we'll end up doing a sync write
3684          * instead.
3685          */
3686         if (len == PAGE_SIZE)
3687                 goto out;
3688
3689         /*
3690          * optimize away the read when we have an oplock, and we're not
3691          * expecting to use any of the data we'd be reading in. That
3692          * is, when the page lies beyond the EOF, or straddles the EOF
3693          * and the write will cover all of the existing data.
3694          */
3695         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3696                 i_size = i_size_read(mapping->host);
3697                 if (page_start >= i_size ||
3698                     (offset == 0 && (pos + len) >= i_size)) {
3699                         zero_user_segments(page, 0, offset,
3700                                            offset + len,
3701                                            PAGE_SIZE);
3702                         /*
3703                          * PageChecked means that the parts of the page
3704                          * to which we're not writing are considered up
3705                          * to date. Once the data is copied to the
3706                          * page, it can be set uptodate.
3707                          */
3708                         SetPageChecked(page);
3709                         goto out;
3710                 }
3711         }
3712
3713         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3714                 /*
3715                  * might as well read a page, it is fast enough. If we get
3716                  * an error, we don't need to return it. cifs_write_end will
3717                  * do a sync write instead since PG_uptodate isn't set.
3718                  */
3719                 cifs_readpage_worker(file, page, &page_start);
3720                 put_page(page);
3721                 oncethru = 1;
3722                 goto start;
3723         } else {
3724                 /* we could try using another file handle if there is one -
3725                    but how would we lock it to prevent close of that handle
3726                    racing with this read? In any case
3727                    this will be written out by write_end so is fine */
3728         }
3729 out:
3730         *pagep = page;
3731         return rc;
3732 }
3733
3734 static int cifs_release_page(struct page *page, gfp_t gfp)
3735 {
3736         if (PagePrivate(page))
3737                 return 0;
3738
3739         return cifs_fscache_release_page(page, gfp);
3740 }
3741
3742 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3743                                  unsigned int length)
3744 {
3745         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3746
3747         if (offset == 0 && length == PAGE_SIZE)
3748                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3749 }
3750
3751 static int cifs_launder_page(struct page *page)
3752 {
3753         int rc = 0;
3754         loff_t range_start = page_offset(page);
3755         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3756         struct writeback_control wbc = {
3757                 .sync_mode = WB_SYNC_ALL,
3758                 .nr_to_write = 0,
3759                 .range_start = range_start,
3760                 .range_end = range_end,
3761         };
3762
3763         cifs_dbg(FYI, "Launder page: %p\n", page);
3764
3765         if (clear_page_dirty_for_io(page))
3766                 rc = cifs_writepage_locked(page, &wbc);
3767
3768         cifs_fscache_invalidate_page(page, page->mapping->host);
3769         return rc;
3770 }
3771
3772 void cifs_oplock_break(struct work_struct *work)
3773 {
3774         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3775                                                   oplock_break);
3776         struct inode *inode = d_inode(cfile->dentry);
3777         struct cifsInodeInfo *cinode = CIFS_I(inode);
3778         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3779         struct TCP_Server_Info *server = tcon->ses->server;
3780         int rc = 0;
3781
3782         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3783                         TASK_UNINTERRUPTIBLE);
3784
3785         server->ops->downgrade_oplock(server, cinode,
3786                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3787
3788         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3789                                                 cifs_has_mand_locks(cinode)) {
3790                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3791                          inode);
3792                 cinode->oplock = 0;
3793         }
3794
3795         if (inode && S_ISREG(inode->i_mode)) {
3796                 if (CIFS_CACHE_READ(cinode))
3797                         break_lease(inode, O_RDONLY);
3798                 else
3799                         break_lease(inode, O_WRONLY);
3800                 rc = filemap_fdatawrite(inode->i_mapping);
3801                 if (!CIFS_CACHE_READ(cinode)) {
3802                         rc = filemap_fdatawait(inode->i_mapping);
3803                         mapping_set_error(inode->i_mapping, rc);
3804                         cifs_zap_mapping(inode);
3805                 }
3806                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3807         }
3808
3809         rc = cifs_push_locks(cfile);
3810         if (rc)
3811                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3812
3813         /*
3814          * releasing stale oplock after recent reconnect of smb session using
3815          * a now incorrect file handle is not a data integrity issue but do
3816          * not bother sending an oplock release if session to server still is
3817          * disconnected since oplock already released by the server
3818          */
3819         if (!cfile->oplock_break_cancelled) {
3820                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3821                                                              cinode);
3822                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3823         }
3824         cifs_done_oplock_break(cinode);
3825 }
3826
3827 /*
3828  * The presence of cifs_direct_io() in the address space ops vector
3829  * allowes open() O_DIRECT flags which would have failed otherwise.
3830  *
3831  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3832  * so this method should never be called.
3833  *
3834  * Direct IO is not yet supported in the cached mode. 
3835  */
3836 static ssize_t
3837 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3838 {
3839         /*
3840          * FIXME
3841          * Eventually need to support direct IO for non forcedirectio mounts
3842          */
3843         return -EINVAL;
3844 }
3845
3846
3847 const struct address_space_operations cifs_addr_ops = {
3848         .readpage = cifs_readpage,
3849         .readpages = cifs_readpages,
3850         .writepage = cifs_writepage,
3851         .writepages = cifs_writepages,
3852         .write_begin = cifs_write_begin,
3853         .write_end = cifs_write_end,
3854         .set_page_dirty = __set_page_dirty_nobuffers,
3855         .releasepage = cifs_release_page,
3856         .direct_IO = cifs_direct_io,
3857         .invalidatepage = cifs_invalidate_page,
3858         .launder_page = cifs_launder_page,
3859 };
3860
3861 /*
3862  * cifs_readpages requires the server to support a buffer large enough to
3863  * contain the header plus one complete page of data.  Otherwise, we need
3864  * to leave cifs_readpages out of the address space operations.
3865  */
3866 const struct address_space_operations cifs_addr_ops_smallbuf = {
3867         .readpage = cifs_readpage,
3868         .writepage = cifs_writepage,
3869         .writepages = cifs_writepages,
3870         .write_begin = cifs_write_begin,
3871         .write_end = cifs_write_end,
3872         .set_page_dirty = __set_page_dirty_nobuffers,
3873         .releasepage = cifs_release_page,
3874         .invalidatepage = cifs_invalidate_page,
3875         .launder_page = cifs_launder_page,
3876 };