Merge tag 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford...
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         oparms.tcon = tcon;
228         oparms.cifs_sb = cifs_sb;
229         oparms.desired_access = desired_access;
230         oparms.create_options = create_options;
231         oparms.disposition = disposition;
232         oparms.path = full_path;
233         oparms.fid = fid;
234         oparms.reconnect = false;
235
236         rc = server->ops->open(xid, &oparms, oplock, buf);
237
238         if (rc)
239                 goto out;
240
241         if (tcon->unix_ext)
242                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
243                                               xid);
244         else
245                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246                                          xid, fid);
247
248 out:
249         kfree(buf);
250         return rc;
251 }
252
253 static bool
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
255 {
256         struct cifs_fid_locks *cur;
257         bool has_locks = false;
258
259         down_read(&cinode->lock_sem);
260         list_for_each_entry(cur, &cinode->llist, llist) {
261                 if (!list_empty(&cur->locks)) {
262                         has_locks = true;
263                         break;
264                 }
265         }
266         up_read(&cinode->lock_sem);
267         return has_locks;
268 }
269
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272                   struct tcon_link *tlink, __u32 oplock)
273 {
274         struct dentry *dentry = file_dentry(file);
275         struct inode *inode = d_inode(dentry);
276         struct cifsInodeInfo *cinode = CIFS_I(inode);
277         struct cifsFileInfo *cfile;
278         struct cifs_fid_locks *fdlocks;
279         struct cifs_tcon *tcon = tlink_tcon(tlink);
280         struct TCP_Server_Info *server = tcon->ses->server;
281
282         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
283         if (cfile == NULL)
284                 return cfile;
285
286         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
287         if (!fdlocks) {
288                 kfree(cfile);
289                 return NULL;
290         }
291
292         INIT_LIST_HEAD(&fdlocks->locks);
293         fdlocks->cfile = cfile;
294         cfile->llist = fdlocks;
295         down_write(&cinode->lock_sem);
296         list_add(&fdlocks->llist, &cinode->llist);
297         up_write(&cinode->lock_sem);
298
299         cfile->count = 1;
300         cfile->pid = current->tgid;
301         cfile->uid = current_fsuid();
302         cfile->dentry = dget(dentry);
303         cfile->f_flags = file->f_flags;
304         cfile->invalidHandle = false;
305         cfile->tlink = cifs_get_tlink(tlink);
306         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307         mutex_init(&cfile->fh_mutex);
308
309         cifs_sb_active(inode->i_sb);
310
311         /*
312          * If the server returned a read oplock and we have mandatory brlocks,
313          * set oplock level to None.
314          */
315         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
316                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
317                 oplock = 0;
318         }
319
320         spin_lock(&cifs_file_list_lock);
321         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
322                 oplock = fid->pending_open->oplock;
323         list_del(&fid->pending_open->olist);
324
325         fid->purge_cache = false;
326         server->ops->set_fid(cfile, fid, oplock);
327
328         list_add(&cfile->tlist, &tcon->openFileList);
329         /* if readable file instance put first in list*/
330         if (file->f_mode & FMODE_READ)
331                 list_add(&cfile->flist, &cinode->openFileList);
332         else
333                 list_add_tail(&cfile->flist, &cinode->openFileList);
334         spin_unlock(&cifs_file_list_lock);
335
336         if (fid->purge_cache)
337                 cifs_zap_mapping(inode);
338
339         file->private_data = cfile;
340         return cfile;
341 }
342
343 struct cifsFileInfo *
344 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
345 {
346         spin_lock(&cifs_file_list_lock);
347         cifsFileInfo_get_locked(cifs_file);
348         spin_unlock(&cifs_file_list_lock);
349         return cifs_file;
350 }
351
352 /*
353  * Release a reference on the file private data. This may involve closing
354  * the filehandle out on the server. Must be called without holding
355  * cifs_file_list_lock.
356  */
357 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
358 {
359         struct inode *inode = d_inode(cifs_file->dentry);
360         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
361         struct TCP_Server_Info *server = tcon->ses->server;
362         struct cifsInodeInfo *cifsi = CIFS_I(inode);
363         struct super_block *sb = inode->i_sb;
364         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
365         struct cifsLockInfo *li, *tmp;
366         struct cifs_fid fid;
367         struct cifs_pending_open open;
368         bool oplock_break_cancelled;
369
370         spin_lock(&cifs_file_list_lock);
371         if (--cifs_file->count > 0) {
372                 spin_unlock(&cifs_file_list_lock);
373                 return;
374         }
375
376         if (server->ops->get_lease_key)
377                 server->ops->get_lease_key(inode, &fid);
378
379         /* store open in pending opens to make sure we don't miss lease break */
380         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
381
382         /* remove it from the lists */
383         list_del(&cifs_file->flist);
384         list_del(&cifs_file->tlist);
385
386         if (list_empty(&cifsi->openFileList)) {
387                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
388                          d_inode(cifs_file->dentry));
389                 /*
390                  * In strict cache mode we need invalidate mapping on the last
391                  * close  because it may cause a error when we open this file
392                  * again and get at least level II oplock.
393                  */
394                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
395                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
396                 cifs_set_oplock_level(cifsi, 0);
397         }
398         spin_unlock(&cifs_file_list_lock);
399
400         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
401
402         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
403                 struct TCP_Server_Info *server = tcon->ses->server;
404                 unsigned int xid;
405
406                 xid = get_xid();
407                 if (server->ops->close)
408                         server->ops->close(xid, tcon, &cifs_file->fid);
409                 _free_xid(xid);
410         }
411
412         if (oplock_break_cancelled)
413                 cifs_done_oplock_break(cifsi);
414
415         cifs_del_pending_open(&open);
416
417         /*
418          * Delete any outstanding lock records. We'll lose them when the file
419          * is closed anyway.
420          */
421         down_write(&cifsi->lock_sem);
422         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
423                 list_del(&li->llist);
424                 cifs_del_lock_waiters(li);
425                 kfree(li);
426         }
427         list_del(&cifs_file->llist->llist);
428         kfree(cifs_file->llist);
429         up_write(&cifsi->lock_sem);
430
431         cifs_put_tlink(cifs_file->tlink);
432         dput(cifs_file->dentry);
433         cifs_sb_deactive(sb);
434         kfree(cifs_file);
435 }
436
437 int cifs_open(struct inode *inode, struct file *file)
438
439 {
440         int rc = -EACCES;
441         unsigned int xid;
442         __u32 oplock;
443         struct cifs_sb_info *cifs_sb;
444         struct TCP_Server_Info *server;
445         struct cifs_tcon *tcon;
446         struct tcon_link *tlink;
447         struct cifsFileInfo *cfile = NULL;
448         char *full_path = NULL;
449         bool posix_open_ok = false;
450         struct cifs_fid fid;
451         struct cifs_pending_open open;
452
453         xid = get_xid();
454
455         cifs_sb = CIFS_SB(inode->i_sb);
456         tlink = cifs_sb_tlink(cifs_sb);
457         if (IS_ERR(tlink)) {
458                 free_xid(xid);
459                 return PTR_ERR(tlink);
460         }
461         tcon = tlink_tcon(tlink);
462         server = tcon->ses->server;
463
464         full_path = build_path_from_dentry(file_dentry(file));
465         if (full_path == NULL) {
466                 rc = -ENOMEM;
467                 goto out;
468         }
469
470         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
471                  inode, file->f_flags, full_path);
472
473         if (file->f_flags & O_DIRECT &&
474             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
475                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
476                         file->f_op = &cifs_file_direct_nobrl_ops;
477                 else
478                         file->f_op = &cifs_file_direct_ops;
479         }
480
481         if (server->oplocks)
482                 oplock = REQ_OPLOCK;
483         else
484                 oplock = 0;
485
486         if (!tcon->broken_posix_open && tcon->unix_ext &&
487             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
488                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
489                 /* can not refresh inode info since size could be stale */
490                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
491                                 cifs_sb->mnt_file_mode /* ignored */,
492                                 file->f_flags, &oplock, &fid.netfid, xid);
493                 if (rc == 0) {
494                         cifs_dbg(FYI, "posix open succeeded\n");
495                         posix_open_ok = true;
496                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
497                         if (tcon->ses->serverNOS)
498                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
499                                          tcon->ses->serverName,
500                                          tcon->ses->serverNOS);
501                         tcon->broken_posix_open = true;
502                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
503                          (rc != -EOPNOTSUPP)) /* path not found or net err */
504                         goto out;
505                 /*
506                  * Else fallthrough to retry open the old way on network i/o
507                  * or DFS errors.
508                  */
509         }
510
511         if (server->ops->get_lease_key)
512                 server->ops->get_lease_key(inode, &fid);
513
514         cifs_add_pending_open(&fid, tlink, &open);
515
516         if (!posix_open_ok) {
517                 if (server->ops->get_lease_key)
518                         server->ops->get_lease_key(inode, &fid);
519
520                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
521                                   file->f_flags, &oplock, &fid, xid);
522                 if (rc) {
523                         cifs_del_pending_open(&open);
524                         goto out;
525                 }
526         }
527
528         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
529         if (cfile == NULL) {
530                 if (server->ops->close)
531                         server->ops->close(xid, tcon, &fid);
532                 cifs_del_pending_open(&open);
533                 rc = -ENOMEM;
534                 goto out;
535         }
536
537         cifs_fscache_set_inode_cookie(inode, file);
538
539         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
540                 /*
541                  * Time to set mode which we can not set earlier due to
542                  * problems creating new read-only files.
543                  */
544                 struct cifs_unix_set_info_args args = {
545                         .mode   = inode->i_mode,
546                         .uid    = INVALID_UID, /* no change */
547                         .gid    = INVALID_GID, /* no change */
548                         .ctime  = NO_CHANGE_64,
549                         .atime  = NO_CHANGE_64,
550                         .mtime  = NO_CHANGE_64,
551                         .device = 0,
552                 };
553                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
554                                        cfile->pid);
555         }
556
557 out:
558         kfree(full_path);
559         free_xid(xid);
560         cifs_put_tlink(tlink);
561         return rc;
562 }
563
564 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
565
566 /*
567  * Try to reacquire byte range locks that were released when session
568  * to server was lost.
569  */
570 static int
571 cifs_relock_file(struct cifsFileInfo *cfile)
572 {
573         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
574         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
575         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
576         int rc = 0;
577
578         down_read(&cinode->lock_sem);
579         if (cinode->can_cache_brlcks) {
580                 /* can cache locks - no need to relock */
581                 up_read(&cinode->lock_sem);
582                 return rc;
583         }
584
585         if (cap_unix(tcon->ses) &&
586             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
587             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
588                 rc = cifs_push_posix_locks(cfile);
589         else
590                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
591
592         up_read(&cinode->lock_sem);
593         return rc;
594 }
595
596 static int
597 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
598 {
599         int rc = -EACCES;
600         unsigned int xid;
601         __u32 oplock;
602         struct cifs_sb_info *cifs_sb;
603         struct cifs_tcon *tcon;
604         struct TCP_Server_Info *server;
605         struct cifsInodeInfo *cinode;
606         struct inode *inode;
607         char *full_path = NULL;
608         int desired_access;
609         int disposition = FILE_OPEN;
610         int create_options = CREATE_NOT_DIR;
611         struct cifs_open_parms oparms;
612
613         xid = get_xid();
614         mutex_lock(&cfile->fh_mutex);
615         if (!cfile->invalidHandle) {
616                 mutex_unlock(&cfile->fh_mutex);
617                 rc = 0;
618                 free_xid(xid);
619                 return rc;
620         }
621
622         inode = d_inode(cfile->dentry);
623         cifs_sb = CIFS_SB(inode->i_sb);
624         tcon = tlink_tcon(cfile->tlink);
625         server = tcon->ses->server;
626
627         /*
628          * Can not grab rename sem here because various ops, including those
629          * that already have the rename sem can end up causing writepage to get
630          * called and if the server was down that means we end up here, and we
631          * can never tell if the caller already has the rename_sem.
632          */
633         full_path = build_path_from_dentry(cfile->dentry);
634         if (full_path == NULL) {
635                 rc = -ENOMEM;
636                 mutex_unlock(&cfile->fh_mutex);
637                 free_xid(xid);
638                 return rc;
639         }
640
641         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
642                  inode, cfile->f_flags, full_path);
643
644         if (tcon->ses->server->oplocks)
645                 oplock = REQ_OPLOCK;
646         else
647                 oplock = 0;
648
649         if (tcon->unix_ext && cap_unix(tcon->ses) &&
650             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
651                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
652                 /*
653                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
654                  * original open. Must mask them off for a reopen.
655                  */
656                 unsigned int oflags = cfile->f_flags &
657                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
658
659                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
660                                      cifs_sb->mnt_file_mode /* ignored */,
661                                      oflags, &oplock, &cfile->fid.netfid, xid);
662                 if (rc == 0) {
663                         cifs_dbg(FYI, "posix reopen succeeded\n");
664                         oparms.reconnect = true;
665                         goto reopen_success;
666                 }
667                 /*
668                  * fallthrough to retry open the old way on errors, especially
669                  * in the reconnect path it is important to retry hard
670                  */
671         }
672
673         desired_access = cifs_convert_flags(cfile->f_flags);
674
675         if (backup_cred(cifs_sb))
676                 create_options |= CREATE_OPEN_BACKUP_INTENT;
677
678         if (server->ops->get_lease_key)
679                 server->ops->get_lease_key(inode, &cfile->fid);
680
681         oparms.tcon = tcon;
682         oparms.cifs_sb = cifs_sb;
683         oparms.desired_access = desired_access;
684         oparms.create_options = create_options;
685         oparms.disposition = disposition;
686         oparms.path = full_path;
687         oparms.fid = &cfile->fid;
688         oparms.reconnect = true;
689
690         /*
691          * Can not refresh inode by passing in file_info buf to be returned by
692          * ops->open and then calling get_inode_info with returned buf since
693          * file might have write behind data that needs to be flushed and server
694          * version of file size can be stale. If we knew for sure that inode was
695          * not dirty locally we could do this.
696          */
697         rc = server->ops->open(xid, &oparms, &oplock, NULL);
698         if (rc == -ENOENT && oparms.reconnect == false) {
699                 /* durable handle timeout is expired - open the file again */
700                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
701                 /* indicate that we need to relock the file */
702                 oparms.reconnect = true;
703         }
704
705         if (rc) {
706                 mutex_unlock(&cfile->fh_mutex);
707                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
708                 cifs_dbg(FYI, "oplock: %d\n", oplock);
709                 goto reopen_error_exit;
710         }
711
712 reopen_success:
713         cfile->invalidHandle = false;
714         mutex_unlock(&cfile->fh_mutex);
715         cinode = CIFS_I(inode);
716
717         if (can_flush) {
718                 rc = filemap_write_and_wait(inode->i_mapping);
719                 mapping_set_error(inode->i_mapping, rc);
720
721                 if (tcon->unix_ext)
722                         rc = cifs_get_inode_info_unix(&inode, full_path,
723                                                       inode->i_sb, xid);
724                 else
725                         rc = cifs_get_inode_info(&inode, full_path, NULL,
726                                                  inode->i_sb, xid, NULL);
727         }
728         /*
729          * Else we are writing out data to server already and could deadlock if
730          * we tried to flush data, and since we do not know if we have data that
731          * would invalidate the current end of file on the server we can not go
732          * to the server to get the new inode info.
733          */
734
735         server->ops->set_fid(cfile, &cfile->fid, oplock);
736         if (oparms.reconnect)
737                 cifs_relock_file(cfile);
738
739 reopen_error_exit:
740         kfree(full_path);
741         free_xid(xid);
742         return rc;
743 }
744
745 int cifs_close(struct inode *inode, struct file *file)
746 {
747         if (file->private_data != NULL) {
748                 cifsFileInfo_put(file->private_data);
749                 file->private_data = NULL;
750         }
751
752         /* return code from the ->release op is always ignored */
753         return 0;
754 }
755
756 int cifs_closedir(struct inode *inode, struct file *file)
757 {
758         int rc = 0;
759         unsigned int xid;
760         struct cifsFileInfo *cfile = file->private_data;
761         struct cifs_tcon *tcon;
762         struct TCP_Server_Info *server;
763         char *buf;
764
765         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
766
767         if (cfile == NULL)
768                 return rc;
769
770         xid = get_xid();
771         tcon = tlink_tcon(cfile->tlink);
772         server = tcon->ses->server;
773
774         cifs_dbg(FYI, "Freeing private data in close dir\n");
775         spin_lock(&cifs_file_list_lock);
776         if (server->ops->dir_needs_close(cfile)) {
777                 cfile->invalidHandle = true;
778                 spin_unlock(&cifs_file_list_lock);
779                 if (server->ops->close_dir)
780                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
781                 else
782                         rc = -ENOSYS;
783                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
784                 /* not much we can do if it fails anyway, ignore rc */
785                 rc = 0;
786         } else
787                 spin_unlock(&cifs_file_list_lock);
788
789         buf = cfile->srch_inf.ntwrk_buf_start;
790         if (buf) {
791                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
792                 cfile->srch_inf.ntwrk_buf_start = NULL;
793                 if (cfile->srch_inf.smallBuf)
794                         cifs_small_buf_release(buf);
795                 else
796                         cifs_buf_release(buf);
797         }
798
799         cifs_put_tlink(cfile->tlink);
800         kfree(file->private_data);
801         file->private_data = NULL;
802         /* BB can we lock the filestruct while this is going on? */
803         free_xid(xid);
804         return rc;
805 }
806
807 static struct cifsLockInfo *
808 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
809 {
810         struct cifsLockInfo *lock =
811                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
812         if (!lock)
813                 return lock;
814         lock->offset = offset;
815         lock->length = length;
816         lock->type = type;
817         lock->pid = current->tgid;
818         INIT_LIST_HEAD(&lock->blist);
819         init_waitqueue_head(&lock->block_q);
820         return lock;
821 }
822
823 void
824 cifs_del_lock_waiters(struct cifsLockInfo *lock)
825 {
826         struct cifsLockInfo *li, *tmp;
827         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
828                 list_del_init(&li->blist);
829                 wake_up(&li->block_q);
830         }
831 }
832
833 #define CIFS_LOCK_OP    0
834 #define CIFS_READ_OP    1
835 #define CIFS_WRITE_OP   2
836
837 /* @rw_check : 0 - no op, 1 - read, 2 - write */
838 static bool
839 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
840                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
841                             struct cifsLockInfo **conf_lock, int rw_check)
842 {
843         struct cifsLockInfo *li;
844         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
845         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
846
847         list_for_each_entry(li, &fdlocks->locks, llist) {
848                 if (offset + length <= li->offset ||
849                     offset >= li->offset + li->length)
850                         continue;
851                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
852                     server->ops->compare_fids(cfile, cur_cfile)) {
853                         /* shared lock prevents write op through the same fid */
854                         if (!(li->type & server->vals->shared_lock_type) ||
855                             rw_check != CIFS_WRITE_OP)
856                                 continue;
857                 }
858                 if ((type & server->vals->shared_lock_type) &&
859                     ((server->ops->compare_fids(cfile, cur_cfile) &&
860                      current->tgid == li->pid) || type == li->type))
861                         continue;
862                 if (conf_lock)
863                         *conf_lock = li;
864                 return true;
865         }
866         return false;
867 }
868
869 bool
870 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
871                         __u8 type, struct cifsLockInfo **conf_lock,
872                         int rw_check)
873 {
874         bool rc = false;
875         struct cifs_fid_locks *cur;
876         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
877
878         list_for_each_entry(cur, &cinode->llist, llist) {
879                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
880                                                  cfile, conf_lock, rw_check);
881                 if (rc)
882                         break;
883         }
884
885         return rc;
886 }
887
888 /*
889  * Check if there is another lock that prevents us to set the lock (mandatory
890  * style). If such a lock exists, update the flock structure with its
891  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
892  * or leave it the same if we can't. Returns 0 if we don't need to request to
893  * the server or 1 otherwise.
894  */
895 static int
896 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
897                __u8 type, struct file_lock *flock)
898 {
899         int rc = 0;
900         struct cifsLockInfo *conf_lock;
901         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
902         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
903         bool exist;
904
905         down_read(&cinode->lock_sem);
906
907         exist = cifs_find_lock_conflict(cfile, offset, length, type,
908                                         &conf_lock, CIFS_LOCK_OP);
909         if (exist) {
910                 flock->fl_start = conf_lock->offset;
911                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
912                 flock->fl_pid = conf_lock->pid;
913                 if (conf_lock->type & server->vals->shared_lock_type)
914                         flock->fl_type = F_RDLCK;
915                 else
916                         flock->fl_type = F_WRLCK;
917         } else if (!cinode->can_cache_brlcks)
918                 rc = 1;
919         else
920                 flock->fl_type = F_UNLCK;
921
922         up_read(&cinode->lock_sem);
923         return rc;
924 }
925
926 static void
927 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
928 {
929         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
930         down_write(&cinode->lock_sem);
931         list_add_tail(&lock->llist, &cfile->llist->locks);
932         up_write(&cinode->lock_sem);
933 }
934
935 /*
936  * Set the byte-range lock (mandatory style). Returns:
937  * 1) 0, if we set the lock and don't need to request to the server;
938  * 2) 1, if no locks prevent us but we need to request to the server;
939  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
940  */
941 static int
942 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
943                  bool wait)
944 {
945         struct cifsLockInfo *conf_lock;
946         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
947         bool exist;
948         int rc = 0;
949
950 try_again:
951         exist = false;
952         down_write(&cinode->lock_sem);
953
954         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
955                                         lock->type, &conf_lock, CIFS_LOCK_OP);
956         if (!exist && cinode->can_cache_brlcks) {
957                 list_add_tail(&lock->llist, &cfile->llist->locks);
958                 up_write(&cinode->lock_sem);
959                 return rc;
960         }
961
962         if (!exist)
963                 rc = 1;
964         else if (!wait)
965                 rc = -EACCES;
966         else {
967                 list_add_tail(&lock->blist, &conf_lock->blist);
968                 up_write(&cinode->lock_sem);
969                 rc = wait_event_interruptible(lock->block_q,
970                                         (lock->blist.prev == &lock->blist) &&
971                                         (lock->blist.next == &lock->blist));
972                 if (!rc)
973                         goto try_again;
974                 down_write(&cinode->lock_sem);
975                 list_del_init(&lock->blist);
976         }
977
978         up_write(&cinode->lock_sem);
979         return rc;
980 }
981
982 /*
983  * Check if there is another lock that prevents us to set the lock (posix
984  * style). If such a lock exists, update the flock structure with its
985  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
986  * or leave it the same if we can't. Returns 0 if we don't need to request to
987  * the server or 1 otherwise.
988  */
989 static int
990 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
991 {
992         int rc = 0;
993         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
994         unsigned char saved_type = flock->fl_type;
995
996         if ((flock->fl_flags & FL_POSIX) == 0)
997                 return 1;
998
999         down_read(&cinode->lock_sem);
1000         posix_test_lock(file, flock);
1001
1002         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1003                 flock->fl_type = saved_type;
1004                 rc = 1;
1005         }
1006
1007         up_read(&cinode->lock_sem);
1008         return rc;
1009 }
1010
1011 /*
1012  * Set the byte-range lock (posix style). Returns:
1013  * 1) 0, if we set the lock and don't need to request to the server;
1014  * 2) 1, if we need to request to the server;
1015  * 3) <0, if the error occurs while setting the lock.
1016  */
1017 static int
1018 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1019 {
1020         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1021         int rc = 1;
1022
1023         if ((flock->fl_flags & FL_POSIX) == 0)
1024                 return rc;
1025
1026 try_again:
1027         down_write(&cinode->lock_sem);
1028         if (!cinode->can_cache_brlcks) {
1029                 up_write(&cinode->lock_sem);
1030                 return rc;
1031         }
1032
1033         rc = posix_lock_file(file, flock, NULL);
1034         up_write(&cinode->lock_sem);
1035         if (rc == FILE_LOCK_DEFERRED) {
1036                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1037                 if (!rc)
1038                         goto try_again;
1039                 posix_unblock_lock(flock);
1040         }
1041         return rc;
1042 }
1043
1044 int
1045 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1046 {
1047         unsigned int xid;
1048         int rc = 0, stored_rc;
1049         struct cifsLockInfo *li, *tmp;
1050         struct cifs_tcon *tcon;
1051         unsigned int num, max_num, max_buf;
1052         LOCKING_ANDX_RANGE *buf, *cur;
1053         int types[] = {LOCKING_ANDX_LARGE_FILES,
1054                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1055         int i;
1056
1057         xid = get_xid();
1058         tcon = tlink_tcon(cfile->tlink);
1059
1060         /*
1061          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1062          * and check it for zero before using.
1063          */
1064         max_buf = tcon->ses->server->maxBuf;
1065         if (!max_buf) {
1066                 free_xid(xid);
1067                 return -EINVAL;
1068         }
1069
1070         max_num = (max_buf - sizeof(struct smb_hdr)) /
1071                                                 sizeof(LOCKING_ANDX_RANGE);
1072         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1073         if (!buf) {
1074                 free_xid(xid);
1075                 return -ENOMEM;
1076         }
1077
1078         for (i = 0; i < 2; i++) {
1079                 cur = buf;
1080                 num = 0;
1081                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1082                         if (li->type != types[i])
1083                                 continue;
1084                         cur->Pid = cpu_to_le16(li->pid);
1085                         cur->LengthLow = cpu_to_le32((u32)li->length);
1086                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1087                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1088                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1089                         if (++num == max_num) {
1090                                 stored_rc = cifs_lockv(xid, tcon,
1091                                                        cfile->fid.netfid,
1092                                                        (__u8)li->type, 0, num,
1093                                                        buf);
1094                                 if (stored_rc)
1095                                         rc = stored_rc;
1096                                 cur = buf;
1097                                 num = 0;
1098                         } else
1099                                 cur++;
1100                 }
1101
1102                 if (num) {
1103                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1104                                                (__u8)types[i], 0, num, buf);
1105                         if (stored_rc)
1106                                 rc = stored_rc;
1107                 }
1108         }
1109
1110         kfree(buf);
1111         free_xid(xid);
1112         return rc;
1113 }
1114
1115 static __u32
1116 hash_lockowner(fl_owner_t owner)
1117 {
1118         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1119 }
1120
1121 struct lock_to_push {
1122         struct list_head llist;
1123         __u64 offset;
1124         __u64 length;
1125         __u32 pid;
1126         __u16 netfid;
1127         __u8 type;
1128 };
1129
1130 static int
1131 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1132 {
1133         struct inode *inode = d_inode(cfile->dentry);
1134         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1135         struct file_lock *flock;
1136         struct file_lock_context *flctx = inode->i_flctx;
1137         unsigned int count = 0, i;
1138         int rc = 0, xid, type;
1139         struct list_head locks_to_send, *el;
1140         struct lock_to_push *lck, *tmp;
1141         __u64 length;
1142
1143         xid = get_xid();
1144
1145         if (!flctx)
1146                 goto out;
1147
1148         spin_lock(&flctx->flc_lock);
1149         list_for_each(el, &flctx->flc_posix) {
1150                 count++;
1151         }
1152         spin_unlock(&flctx->flc_lock);
1153
1154         INIT_LIST_HEAD(&locks_to_send);
1155
1156         /*
1157          * Allocating count locks is enough because no FL_POSIX locks can be
1158          * added to the list while we are holding cinode->lock_sem that
1159          * protects locking operations of this inode.
1160          */
1161         for (i = 0; i < count; i++) {
1162                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1163                 if (!lck) {
1164                         rc = -ENOMEM;
1165                         goto err_out;
1166                 }
1167                 list_add_tail(&lck->llist, &locks_to_send);
1168         }
1169
1170         el = locks_to_send.next;
1171         spin_lock(&flctx->flc_lock);
1172         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1173                 if (el == &locks_to_send) {
1174                         /*
1175                          * The list ended. We don't have enough allocated
1176                          * structures - something is really wrong.
1177                          */
1178                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1179                         break;
1180                 }
1181                 length = 1 + flock->fl_end - flock->fl_start;
1182                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1183                         type = CIFS_RDLCK;
1184                 else
1185                         type = CIFS_WRLCK;
1186                 lck = list_entry(el, struct lock_to_push, llist);
1187                 lck->pid = hash_lockowner(flock->fl_owner);
1188                 lck->netfid = cfile->fid.netfid;
1189                 lck->length = length;
1190                 lck->type = type;
1191                 lck->offset = flock->fl_start;
1192         }
1193         spin_unlock(&flctx->flc_lock);
1194
1195         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1196                 int stored_rc;
1197
1198                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1199                                              lck->offset, lck->length, NULL,
1200                                              lck->type, 0);
1201                 if (stored_rc)
1202                         rc = stored_rc;
1203                 list_del(&lck->llist);
1204                 kfree(lck);
1205         }
1206
1207 out:
1208         free_xid(xid);
1209         return rc;
1210 err_out:
1211         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1212                 list_del(&lck->llist);
1213                 kfree(lck);
1214         }
1215         goto out;
1216 }
1217
1218 static int
1219 cifs_push_locks(struct cifsFileInfo *cfile)
1220 {
1221         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1222         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1223         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1224         int rc = 0;
1225
1226         /* we are going to update can_cache_brlcks here - need a write access */
1227         down_write(&cinode->lock_sem);
1228         if (!cinode->can_cache_brlcks) {
1229                 up_write(&cinode->lock_sem);
1230                 return rc;
1231         }
1232
1233         if (cap_unix(tcon->ses) &&
1234             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1235             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1236                 rc = cifs_push_posix_locks(cfile);
1237         else
1238                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1239
1240         cinode->can_cache_brlcks = false;
1241         up_write(&cinode->lock_sem);
1242         return rc;
1243 }
1244
1245 static void
1246 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1247                 bool *wait_flag, struct TCP_Server_Info *server)
1248 {
1249         if (flock->fl_flags & FL_POSIX)
1250                 cifs_dbg(FYI, "Posix\n");
1251         if (flock->fl_flags & FL_FLOCK)
1252                 cifs_dbg(FYI, "Flock\n");
1253         if (flock->fl_flags & FL_SLEEP) {
1254                 cifs_dbg(FYI, "Blocking lock\n");
1255                 *wait_flag = true;
1256         }
1257         if (flock->fl_flags & FL_ACCESS)
1258                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1259         if (flock->fl_flags & FL_LEASE)
1260                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1261         if (flock->fl_flags &
1262             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1263                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1264                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1265
1266         *type = server->vals->large_lock_type;
1267         if (flock->fl_type == F_WRLCK) {
1268                 cifs_dbg(FYI, "F_WRLCK\n");
1269                 *type |= server->vals->exclusive_lock_type;
1270                 *lock = 1;
1271         } else if (flock->fl_type == F_UNLCK) {
1272                 cifs_dbg(FYI, "F_UNLCK\n");
1273                 *type |= server->vals->unlock_lock_type;
1274                 *unlock = 1;
1275                 /* Check if unlock includes more than one lock range */
1276         } else if (flock->fl_type == F_RDLCK) {
1277                 cifs_dbg(FYI, "F_RDLCK\n");
1278                 *type |= server->vals->shared_lock_type;
1279                 *lock = 1;
1280         } else if (flock->fl_type == F_EXLCK) {
1281                 cifs_dbg(FYI, "F_EXLCK\n");
1282                 *type |= server->vals->exclusive_lock_type;
1283                 *lock = 1;
1284         } else if (flock->fl_type == F_SHLCK) {
1285                 cifs_dbg(FYI, "F_SHLCK\n");
1286                 *type |= server->vals->shared_lock_type;
1287                 *lock = 1;
1288         } else
1289                 cifs_dbg(FYI, "Unknown type of lock\n");
1290 }
1291
1292 static int
1293 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1294            bool wait_flag, bool posix_lck, unsigned int xid)
1295 {
1296         int rc = 0;
1297         __u64 length = 1 + flock->fl_end - flock->fl_start;
1298         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1299         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1300         struct TCP_Server_Info *server = tcon->ses->server;
1301         __u16 netfid = cfile->fid.netfid;
1302
1303         if (posix_lck) {
1304                 int posix_lock_type;
1305
1306                 rc = cifs_posix_lock_test(file, flock);
1307                 if (!rc)
1308                         return rc;
1309
1310                 if (type & server->vals->shared_lock_type)
1311                         posix_lock_type = CIFS_RDLCK;
1312                 else
1313                         posix_lock_type = CIFS_WRLCK;
1314                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1315                                       hash_lockowner(flock->fl_owner),
1316                                       flock->fl_start, length, flock,
1317                                       posix_lock_type, wait_flag);
1318                 return rc;
1319         }
1320
1321         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1322         if (!rc)
1323                 return rc;
1324
1325         /* BB we could chain these into one lock request BB */
1326         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1327                                     1, 0, false);
1328         if (rc == 0) {
1329                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1330                                             type, 0, 1, false);
1331                 flock->fl_type = F_UNLCK;
1332                 if (rc != 0)
1333                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1334                                  rc);
1335                 return 0;
1336         }
1337
1338         if (type & server->vals->shared_lock_type) {
1339                 flock->fl_type = F_WRLCK;
1340                 return 0;
1341         }
1342
1343         type &= ~server->vals->exclusive_lock_type;
1344
1345         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1346                                     type | server->vals->shared_lock_type,
1347                                     1, 0, false);
1348         if (rc == 0) {
1349                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1350                         type | server->vals->shared_lock_type, 0, 1, false);
1351                 flock->fl_type = F_RDLCK;
1352                 if (rc != 0)
1353                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1354                                  rc);
1355         } else
1356                 flock->fl_type = F_WRLCK;
1357
1358         return 0;
1359 }
1360
1361 void
1362 cifs_move_llist(struct list_head *source, struct list_head *dest)
1363 {
1364         struct list_head *li, *tmp;
1365         list_for_each_safe(li, tmp, source)
1366                 list_move(li, dest);
1367 }
1368
1369 void
1370 cifs_free_llist(struct list_head *llist)
1371 {
1372         struct cifsLockInfo *li, *tmp;
1373         list_for_each_entry_safe(li, tmp, llist, llist) {
1374                 cifs_del_lock_waiters(li);
1375                 list_del(&li->llist);
1376                 kfree(li);
1377         }
1378 }
1379
1380 int
1381 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1382                   unsigned int xid)
1383 {
1384         int rc = 0, stored_rc;
1385         int types[] = {LOCKING_ANDX_LARGE_FILES,
1386                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1387         unsigned int i;
1388         unsigned int max_num, num, max_buf;
1389         LOCKING_ANDX_RANGE *buf, *cur;
1390         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1391         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1392         struct cifsLockInfo *li, *tmp;
1393         __u64 length = 1 + flock->fl_end - flock->fl_start;
1394         struct list_head tmp_llist;
1395
1396         INIT_LIST_HEAD(&tmp_llist);
1397
1398         /*
1399          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1400          * and check it for zero before using.
1401          */
1402         max_buf = tcon->ses->server->maxBuf;
1403         if (!max_buf)
1404                 return -EINVAL;
1405
1406         max_num = (max_buf - sizeof(struct smb_hdr)) /
1407                                                 sizeof(LOCKING_ANDX_RANGE);
1408         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1409         if (!buf)
1410                 return -ENOMEM;
1411
1412         down_write(&cinode->lock_sem);
1413         for (i = 0; i < 2; i++) {
1414                 cur = buf;
1415                 num = 0;
1416                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1417                         if (flock->fl_start > li->offset ||
1418                             (flock->fl_start + length) <
1419                             (li->offset + li->length))
1420                                 continue;
1421                         if (current->tgid != li->pid)
1422                                 continue;
1423                         if (types[i] != li->type)
1424                                 continue;
1425                         if (cinode->can_cache_brlcks) {
1426                                 /*
1427                                  * We can cache brlock requests - simply remove
1428                                  * a lock from the file's list.
1429                                  */
1430                                 list_del(&li->llist);
1431                                 cifs_del_lock_waiters(li);
1432                                 kfree(li);
1433                                 continue;
1434                         }
1435                         cur->Pid = cpu_to_le16(li->pid);
1436                         cur->LengthLow = cpu_to_le32((u32)li->length);
1437                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1438                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1439                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1440                         /*
1441                          * We need to save a lock here to let us add it again to
1442                          * the file's list if the unlock range request fails on
1443                          * the server.
1444                          */
1445                         list_move(&li->llist, &tmp_llist);
1446                         if (++num == max_num) {
1447                                 stored_rc = cifs_lockv(xid, tcon,
1448                                                        cfile->fid.netfid,
1449                                                        li->type, num, 0, buf);
1450                                 if (stored_rc) {
1451                                         /*
1452                                          * We failed on the unlock range
1453                                          * request - add all locks from the tmp
1454                                          * list to the head of the file's list.
1455                                          */
1456                                         cifs_move_llist(&tmp_llist,
1457                                                         &cfile->llist->locks);
1458                                         rc = stored_rc;
1459                                 } else
1460                                         /*
1461                                          * The unlock range request succeed -
1462                                          * free the tmp list.
1463                                          */
1464                                         cifs_free_llist(&tmp_llist);
1465                                 cur = buf;
1466                                 num = 0;
1467                         } else
1468                                 cur++;
1469                 }
1470                 if (num) {
1471                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1472                                                types[i], num, 0, buf);
1473                         if (stored_rc) {
1474                                 cifs_move_llist(&tmp_llist,
1475                                                 &cfile->llist->locks);
1476                                 rc = stored_rc;
1477                         } else
1478                                 cifs_free_llist(&tmp_llist);
1479                 }
1480         }
1481
1482         up_write(&cinode->lock_sem);
1483         kfree(buf);
1484         return rc;
1485 }
1486
1487 static int
1488 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1489            bool wait_flag, bool posix_lck, int lock, int unlock,
1490            unsigned int xid)
1491 {
1492         int rc = 0;
1493         __u64 length = 1 + flock->fl_end - flock->fl_start;
1494         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1495         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1496         struct TCP_Server_Info *server = tcon->ses->server;
1497         struct inode *inode = d_inode(cfile->dentry);
1498
1499         if (posix_lck) {
1500                 int posix_lock_type;
1501
1502                 rc = cifs_posix_lock_set(file, flock);
1503                 if (!rc || rc < 0)
1504                         return rc;
1505
1506                 if (type & server->vals->shared_lock_type)
1507                         posix_lock_type = CIFS_RDLCK;
1508                 else
1509                         posix_lock_type = CIFS_WRLCK;
1510
1511                 if (unlock == 1)
1512                         posix_lock_type = CIFS_UNLCK;
1513
1514                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1515                                       hash_lockowner(flock->fl_owner),
1516                                       flock->fl_start, length,
1517                                       NULL, posix_lock_type, wait_flag);
1518                 goto out;
1519         }
1520
1521         if (lock) {
1522                 struct cifsLockInfo *lock;
1523
1524                 lock = cifs_lock_init(flock->fl_start, length, type);
1525                 if (!lock)
1526                         return -ENOMEM;
1527
1528                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1529                 if (rc < 0) {
1530                         kfree(lock);
1531                         return rc;
1532                 }
1533                 if (!rc)
1534                         goto out;
1535
1536                 /*
1537                  * Windows 7 server can delay breaking lease from read to None
1538                  * if we set a byte-range lock on a file - break it explicitly
1539                  * before sending the lock to the server to be sure the next
1540                  * read won't conflict with non-overlapted locks due to
1541                  * pagereading.
1542                  */
1543                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1544                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1545                         cifs_zap_mapping(inode);
1546                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1547                                  inode);
1548                         CIFS_I(inode)->oplock = 0;
1549                 }
1550
1551                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1552                                             type, 1, 0, wait_flag);
1553                 if (rc) {
1554                         kfree(lock);
1555                         return rc;
1556                 }
1557
1558                 cifs_lock_add(cfile, lock);
1559         } else if (unlock)
1560                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1561
1562 out:
1563         if (flock->fl_flags & FL_POSIX && !rc)
1564                 rc = locks_lock_file_wait(file, flock);
1565         return rc;
1566 }
1567
1568 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1569 {
1570         int rc, xid;
1571         int lock = 0, unlock = 0;
1572         bool wait_flag = false;
1573         bool posix_lck = false;
1574         struct cifs_sb_info *cifs_sb;
1575         struct cifs_tcon *tcon;
1576         struct cifsInodeInfo *cinode;
1577         struct cifsFileInfo *cfile;
1578         __u16 netfid;
1579         __u32 type;
1580
1581         rc = -EACCES;
1582         xid = get_xid();
1583
1584         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1585                  cmd, flock->fl_flags, flock->fl_type,
1586                  flock->fl_start, flock->fl_end);
1587
1588         cfile = (struct cifsFileInfo *)file->private_data;
1589         tcon = tlink_tcon(cfile->tlink);
1590
1591         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1592                         tcon->ses->server);
1593
1594         cifs_sb = CIFS_FILE_SB(file);
1595         netfid = cfile->fid.netfid;
1596         cinode = CIFS_I(file_inode(file));
1597
1598         if (cap_unix(tcon->ses) &&
1599             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1600             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1601                 posix_lck = true;
1602         /*
1603          * BB add code here to normalize offset and length to account for
1604          * negative length which we can not accept over the wire.
1605          */
1606         if (IS_GETLK(cmd)) {
1607                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1608                 free_xid(xid);
1609                 return rc;
1610         }
1611
1612         if (!lock && !unlock) {
1613                 /*
1614                  * if no lock or unlock then nothing to do since we do not
1615                  * know what it is
1616                  */
1617                 free_xid(xid);
1618                 return -EOPNOTSUPP;
1619         }
1620
1621         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1622                         xid);
1623         free_xid(xid);
1624         return rc;
1625 }
1626
1627 /*
1628  * update the file size (if needed) after a write. Should be called with
1629  * the inode->i_lock held
1630  */
1631 void
1632 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1633                       unsigned int bytes_written)
1634 {
1635         loff_t end_of_write = offset + bytes_written;
1636
1637         if (end_of_write > cifsi->server_eof)
1638                 cifsi->server_eof = end_of_write;
1639 }
1640
1641 static ssize_t
1642 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1643            size_t write_size, loff_t *offset)
1644 {
1645         int rc = 0;
1646         unsigned int bytes_written = 0;
1647         unsigned int total_written;
1648         struct cifs_sb_info *cifs_sb;
1649         struct cifs_tcon *tcon;
1650         struct TCP_Server_Info *server;
1651         unsigned int xid;
1652         struct dentry *dentry = open_file->dentry;
1653         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1654         struct cifs_io_parms io_parms;
1655
1656         cifs_sb = CIFS_SB(dentry->d_sb);
1657
1658         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1659                  write_size, *offset, dentry);
1660
1661         tcon = tlink_tcon(open_file->tlink);
1662         server = tcon->ses->server;
1663
1664         if (!server->ops->sync_write)
1665                 return -ENOSYS;
1666
1667         xid = get_xid();
1668
1669         for (total_written = 0; write_size > total_written;
1670              total_written += bytes_written) {
1671                 rc = -EAGAIN;
1672                 while (rc == -EAGAIN) {
1673                         struct kvec iov[2];
1674                         unsigned int len;
1675
1676                         if (open_file->invalidHandle) {
1677                                 /* we could deadlock if we called
1678                                    filemap_fdatawait from here so tell
1679                                    reopen_file not to flush data to
1680                                    server now */
1681                                 rc = cifs_reopen_file(open_file, false);
1682                                 if (rc != 0)
1683                                         break;
1684                         }
1685
1686                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1687                                   (unsigned int)write_size - total_written);
1688                         /* iov[0] is reserved for smb header */
1689                         iov[1].iov_base = (char *)write_data + total_written;
1690                         iov[1].iov_len = len;
1691                         io_parms.pid = pid;
1692                         io_parms.tcon = tcon;
1693                         io_parms.offset = *offset;
1694                         io_parms.length = len;
1695                         rc = server->ops->sync_write(xid, &open_file->fid,
1696                                         &io_parms, &bytes_written, iov, 1);
1697                 }
1698                 if (rc || (bytes_written == 0)) {
1699                         if (total_written)
1700                                 break;
1701                         else {
1702                                 free_xid(xid);
1703                                 return rc;
1704                         }
1705                 } else {
1706                         spin_lock(&d_inode(dentry)->i_lock);
1707                         cifs_update_eof(cifsi, *offset, bytes_written);
1708                         spin_unlock(&d_inode(dentry)->i_lock);
1709                         *offset += bytes_written;
1710                 }
1711         }
1712
1713         cifs_stats_bytes_written(tcon, total_written);
1714
1715         if (total_written > 0) {
1716                 spin_lock(&d_inode(dentry)->i_lock);
1717                 if (*offset > d_inode(dentry)->i_size)
1718                         i_size_write(d_inode(dentry), *offset);
1719                 spin_unlock(&d_inode(dentry)->i_lock);
1720         }
1721         mark_inode_dirty_sync(d_inode(dentry));
1722         free_xid(xid);
1723         return total_written;
1724 }
1725
1726 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1727                                         bool fsuid_only)
1728 {
1729         struct cifsFileInfo *open_file = NULL;
1730         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1731
1732         /* only filter by fsuid on multiuser mounts */
1733         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1734                 fsuid_only = false;
1735
1736         spin_lock(&cifs_file_list_lock);
1737         /* we could simply get the first_list_entry since write-only entries
1738            are always at the end of the list but since the first entry might
1739            have a close pending, we go through the whole list */
1740         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1741                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1742                         continue;
1743                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1744                         if (!open_file->invalidHandle) {
1745                                 /* found a good file */
1746                                 /* lock it so it will not be closed on us */
1747                                 cifsFileInfo_get_locked(open_file);
1748                                 spin_unlock(&cifs_file_list_lock);
1749                                 return open_file;
1750                         } /* else might as well continue, and look for
1751                              another, or simply have the caller reopen it
1752                              again rather than trying to fix this handle */
1753                 } else /* write only file */
1754                         break; /* write only files are last so must be done */
1755         }
1756         spin_unlock(&cifs_file_list_lock);
1757         return NULL;
1758 }
1759
1760 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1761                                         bool fsuid_only)
1762 {
1763         struct cifsFileInfo *open_file, *inv_file = NULL;
1764         struct cifs_sb_info *cifs_sb;
1765         bool any_available = false;
1766         int rc;
1767         unsigned int refind = 0;
1768
1769         /* Having a null inode here (because mapping->host was set to zero by
1770         the VFS or MM) should not happen but we had reports of on oops (due to
1771         it being zero) during stress testcases so we need to check for it */
1772
1773         if (cifs_inode == NULL) {
1774                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1775                 dump_stack();
1776                 return NULL;
1777         }
1778
1779         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1780
1781         /* only filter by fsuid on multiuser mounts */
1782         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1783                 fsuid_only = false;
1784
1785         spin_lock(&cifs_file_list_lock);
1786 refind_writable:
1787         if (refind > MAX_REOPEN_ATT) {
1788                 spin_unlock(&cifs_file_list_lock);
1789                 return NULL;
1790         }
1791         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1792                 if (!any_available && open_file->pid != current->tgid)
1793                         continue;
1794                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1795                         continue;
1796                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1797                         if (!open_file->invalidHandle) {
1798                                 /* found a good writable file */
1799                                 cifsFileInfo_get_locked(open_file);
1800                                 spin_unlock(&cifs_file_list_lock);
1801                                 return open_file;
1802                         } else {
1803                                 if (!inv_file)
1804                                         inv_file = open_file;
1805                         }
1806                 }
1807         }
1808         /* couldn't find useable FH with same pid, try any available */
1809         if (!any_available) {
1810                 any_available = true;
1811                 goto refind_writable;
1812         }
1813
1814         if (inv_file) {
1815                 any_available = false;
1816                 cifsFileInfo_get_locked(inv_file);
1817         }
1818
1819         spin_unlock(&cifs_file_list_lock);
1820
1821         if (inv_file) {
1822                 rc = cifs_reopen_file(inv_file, false);
1823                 if (!rc)
1824                         return inv_file;
1825                 else {
1826                         spin_lock(&cifs_file_list_lock);
1827                         list_move_tail(&inv_file->flist,
1828                                         &cifs_inode->openFileList);
1829                         spin_unlock(&cifs_file_list_lock);
1830                         cifsFileInfo_put(inv_file);
1831                         spin_lock(&cifs_file_list_lock);
1832                         ++refind;
1833                         inv_file = NULL;
1834                         goto refind_writable;
1835                 }
1836         }
1837
1838         return NULL;
1839 }
1840
1841 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1842 {
1843         struct address_space *mapping = page->mapping;
1844         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1845         char *write_data;
1846         int rc = -EFAULT;
1847         int bytes_written = 0;
1848         struct inode *inode;
1849         struct cifsFileInfo *open_file;
1850
1851         if (!mapping || !mapping->host)
1852                 return -EFAULT;
1853
1854         inode = page->mapping->host;
1855
1856         offset += (loff_t)from;
1857         write_data = kmap(page);
1858         write_data += from;
1859
1860         if ((to > PAGE_SIZE) || (from > to)) {
1861                 kunmap(page);
1862                 return -EIO;
1863         }
1864
1865         /* racing with truncate? */
1866         if (offset > mapping->host->i_size) {
1867                 kunmap(page);
1868                 return 0; /* don't care */
1869         }
1870
1871         /* check to make sure that we are not extending the file */
1872         if (mapping->host->i_size - offset < (loff_t)to)
1873                 to = (unsigned)(mapping->host->i_size - offset);
1874
1875         open_file = find_writable_file(CIFS_I(mapping->host), false);
1876         if (open_file) {
1877                 bytes_written = cifs_write(open_file, open_file->pid,
1878                                            write_data, to - from, &offset);
1879                 cifsFileInfo_put(open_file);
1880                 /* Does mm or vfs already set times? */
1881                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1882                 if ((bytes_written > 0) && (offset))
1883                         rc = 0;
1884                 else if (bytes_written < 0)
1885                         rc = bytes_written;
1886         } else {
1887                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1888                 rc = -EIO;
1889         }
1890
1891         kunmap(page);
1892         return rc;
1893 }
1894
1895 static struct cifs_writedata *
1896 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1897                           pgoff_t end, pgoff_t *index,
1898                           unsigned int *found_pages)
1899 {
1900         unsigned int nr_pages;
1901         struct page **pages;
1902         struct cifs_writedata *wdata;
1903
1904         wdata = cifs_writedata_alloc((unsigned int)tofind,
1905                                      cifs_writev_complete);
1906         if (!wdata)
1907                 return NULL;
1908
1909         /*
1910          * find_get_pages_tag seems to return a max of 256 on each
1911          * iteration, so we must call it several times in order to
1912          * fill the array or the wsize is effectively limited to
1913          * 256 * PAGE_SIZE.
1914          */
1915         *found_pages = 0;
1916         pages = wdata->pages;
1917         do {
1918                 nr_pages = find_get_pages_tag(mapping, index,
1919                                               PAGECACHE_TAG_DIRTY, tofind,
1920                                               pages);
1921                 *found_pages += nr_pages;
1922                 tofind -= nr_pages;
1923                 pages += nr_pages;
1924         } while (nr_pages && tofind && *index <= end);
1925
1926         return wdata;
1927 }
1928
1929 static unsigned int
1930 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1931                     struct address_space *mapping,
1932                     struct writeback_control *wbc,
1933                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1934 {
1935         unsigned int nr_pages = 0, i;
1936         struct page *page;
1937
1938         for (i = 0; i < found_pages; i++) {
1939                 page = wdata->pages[i];
1940                 /*
1941                  * At this point we hold neither mapping->tree_lock nor
1942                  * lock on the page itself: the page may be truncated or
1943                  * invalidated (changing page->mapping to NULL), or even
1944                  * swizzled back from swapper_space to tmpfs file
1945                  * mapping
1946                  */
1947
1948                 if (nr_pages == 0)
1949                         lock_page(page);
1950                 else if (!trylock_page(page))
1951                         break;
1952
1953                 if (unlikely(page->mapping != mapping)) {
1954                         unlock_page(page);
1955                         break;
1956                 }
1957
1958                 if (!wbc->range_cyclic && page->index > end) {
1959                         *done = true;
1960                         unlock_page(page);
1961                         break;
1962                 }
1963
1964                 if (*next && (page->index != *next)) {
1965                         /* Not next consecutive page */
1966                         unlock_page(page);
1967                         break;
1968                 }
1969
1970                 if (wbc->sync_mode != WB_SYNC_NONE)
1971                         wait_on_page_writeback(page);
1972
1973                 if (PageWriteback(page) ||
1974                                 !clear_page_dirty_for_io(page)) {
1975                         unlock_page(page);
1976                         break;
1977                 }
1978
1979                 /*
1980                  * This actually clears the dirty bit in the radix tree.
1981                  * See cifs_writepage() for more commentary.
1982                  */
1983                 set_page_writeback(page);
1984                 if (page_offset(page) >= i_size_read(mapping->host)) {
1985                         *done = true;
1986                         unlock_page(page);
1987                         end_page_writeback(page);
1988                         break;
1989                 }
1990
1991                 wdata->pages[i] = page;
1992                 *next = page->index + 1;
1993                 ++nr_pages;
1994         }
1995
1996         /* reset index to refind any pages skipped */
1997         if (nr_pages == 0)
1998                 *index = wdata->pages[0]->index + 1;
1999
2000         /* put any pages we aren't going to use */
2001         for (i = nr_pages; i < found_pages; i++) {
2002                 put_page(wdata->pages[i]);
2003                 wdata->pages[i] = NULL;
2004         }
2005
2006         return nr_pages;
2007 }
2008
2009 static int
2010 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2011                  struct address_space *mapping, struct writeback_control *wbc)
2012 {
2013         int rc = 0;
2014         struct TCP_Server_Info *server;
2015         unsigned int i;
2016
2017         wdata->sync_mode = wbc->sync_mode;
2018         wdata->nr_pages = nr_pages;
2019         wdata->offset = page_offset(wdata->pages[0]);
2020         wdata->pagesz = PAGE_SIZE;
2021         wdata->tailsz = min(i_size_read(mapping->host) -
2022                         page_offset(wdata->pages[nr_pages - 1]),
2023                         (loff_t)PAGE_SIZE);
2024         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2025
2026         if (wdata->cfile != NULL)
2027                 cifsFileInfo_put(wdata->cfile);
2028         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2029         if (!wdata->cfile) {
2030                 cifs_dbg(VFS, "No writable handles for inode\n");
2031                 rc = -EBADF;
2032         } else {
2033                 wdata->pid = wdata->cfile->pid;
2034                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2035                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2036         }
2037
2038         for (i = 0; i < nr_pages; ++i)
2039                 unlock_page(wdata->pages[i]);
2040
2041         return rc;
2042 }
2043
2044 static int cifs_writepages(struct address_space *mapping,
2045                            struct writeback_control *wbc)
2046 {
2047         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2048         struct TCP_Server_Info *server;
2049         bool done = false, scanned = false, range_whole = false;
2050         pgoff_t end, index;
2051         struct cifs_writedata *wdata;
2052         int rc = 0;
2053
2054         /*
2055          * If wsize is smaller than the page cache size, default to writing
2056          * one page at a time via cifs_writepage
2057          */
2058         if (cifs_sb->wsize < PAGE_SIZE)
2059                 return generic_writepages(mapping, wbc);
2060
2061         if (wbc->range_cyclic) {
2062                 index = mapping->writeback_index; /* Start from prev offset */
2063                 end = -1;
2064         } else {
2065                 index = wbc->range_start >> PAGE_SHIFT;
2066                 end = wbc->range_end >> PAGE_SHIFT;
2067                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2068                         range_whole = true;
2069                 scanned = true;
2070         }
2071         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2072 retry:
2073         while (!done && index <= end) {
2074                 unsigned int i, nr_pages, found_pages, wsize, credits;
2075                 pgoff_t next = 0, tofind, saved_index = index;
2076
2077                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2078                                                    &wsize, &credits);
2079                 if (rc)
2080                         break;
2081
2082                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2083
2084                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2085                                                   &found_pages);
2086                 if (!wdata) {
2087                         rc = -ENOMEM;
2088                         add_credits_and_wake_if(server, credits, 0);
2089                         break;
2090                 }
2091
2092                 if (found_pages == 0) {
2093                         kref_put(&wdata->refcount, cifs_writedata_release);
2094                         add_credits_and_wake_if(server, credits, 0);
2095                         break;
2096                 }
2097
2098                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2099                                                end, &index, &next, &done);
2100
2101                 /* nothing to write? */
2102                 if (nr_pages == 0) {
2103                         kref_put(&wdata->refcount, cifs_writedata_release);
2104                         add_credits_and_wake_if(server, credits, 0);
2105                         continue;
2106                 }
2107
2108                 wdata->credits = credits;
2109
2110                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2111
2112                 /* send failure -- clean up the mess */
2113                 if (rc != 0) {
2114                         add_credits_and_wake_if(server, wdata->credits, 0);
2115                         for (i = 0; i < nr_pages; ++i) {
2116                                 if (rc == -EAGAIN)
2117                                         redirty_page_for_writepage(wbc,
2118                                                            wdata->pages[i]);
2119                                 else
2120                                         SetPageError(wdata->pages[i]);
2121                                 end_page_writeback(wdata->pages[i]);
2122                                 put_page(wdata->pages[i]);
2123                         }
2124                         if (rc != -EAGAIN)
2125                                 mapping_set_error(mapping, rc);
2126                 }
2127                 kref_put(&wdata->refcount, cifs_writedata_release);
2128
2129                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2130                         index = saved_index;
2131                         continue;
2132                 }
2133
2134                 wbc->nr_to_write -= nr_pages;
2135                 if (wbc->nr_to_write <= 0)
2136                         done = true;
2137
2138                 index = next;
2139         }
2140
2141         if (!scanned && !done) {
2142                 /*
2143                  * We hit the last page and there is more work to be done: wrap
2144                  * back to the start of the file
2145                  */
2146                 scanned = true;
2147                 index = 0;
2148                 goto retry;
2149         }
2150
2151         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2152                 mapping->writeback_index = index;
2153
2154         return rc;
2155 }
2156
2157 static int
2158 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2159 {
2160         int rc;
2161         unsigned int xid;
2162
2163         xid = get_xid();
2164 /* BB add check for wbc flags */
2165         get_page(page);
2166         if (!PageUptodate(page))
2167                 cifs_dbg(FYI, "ppw - page not up to date\n");
2168
2169         /*
2170          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2171          *
2172          * A writepage() implementation always needs to do either this,
2173          * or re-dirty the page with "redirty_page_for_writepage()" in
2174          * the case of a failure.
2175          *
2176          * Just unlocking the page will cause the radix tree tag-bits
2177          * to fail to update with the state of the page correctly.
2178          */
2179         set_page_writeback(page);
2180 retry_write:
2181         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2182         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2183                 goto retry_write;
2184         else if (rc == -EAGAIN)
2185                 redirty_page_for_writepage(wbc, page);
2186         else if (rc != 0)
2187                 SetPageError(page);
2188         else
2189                 SetPageUptodate(page);
2190         end_page_writeback(page);
2191         put_page(page);
2192         free_xid(xid);
2193         return rc;
2194 }
2195
2196 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2197 {
2198         int rc = cifs_writepage_locked(page, wbc);
2199         unlock_page(page);
2200         return rc;
2201 }
2202
2203 static int cifs_write_end(struct file *file, struct address_space *mapping,
2204                         loff_t pos, unsigned len, unsigned copied,
2205                         struct page *page, void *fsdata)
2206 {
2207         int rc;
2208         struct inode *inode = mapping->host;
2209         struct cifsFileInfo *cfile = file->private_data;
2210         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2211         __u32 pid;
2212
2213         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2214                 pid = cfile->pid;
2215         else
2216                 pid = current->tgid;
2217
2218         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2219                  page, pos, copied);
2220
2221         if (PageChecked(page)) {
2222                 if (copied == len)
2223                         SetPageUptodate(page);
2224                 ClearPageChecked(page);
2225         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2226                 SetPageUptodate(page);
2227
2228         if (!PageUptodate(page)) {
2229                 char *page_data;
2230                 unsigned offset = pos & (PAGE_SIZE - 1);
2231                 unsigned int xid;
2232
2233                 xid = get_xid();
2234                 /* this is probably better than directly calling
2235                    partialpage_write since in this function the file handle is
2236                    known which we might as well leverage */
2237                 /* BB check if anything else missing out of ppw
2238                    such as updating last write time */
2239                 page_data = kmap(page);
2240                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2241                 /* if (rc < 0) should we set writebehind rc? */
2242                 kunmap(page);
2243
2244                 free_xid(xid);
2245         } else {
2246                 rc = copied;
2247                 pos += copied;
2248                 set_page_dirty(page);
2249         }
2250
2251         if (rc > 0) {
2252                 spin_lock(&inode->i_lock);
2253                 if (pos > inode->i_size)
2254                         i_size_write(inode, pos);
2255                 spin_unlock(&inode->i_lock);
2256         }
2257
2258         unlock_page(page);
2259         put_page(page);
2260
2261         return rc;
2262 }
2263
2264 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2265                       int datasync)
2266 {
2267         unsigned int xid;
2268         int rc = 0;
2269         struct cifs_tcon *tcon;
2270         struct TCP_Server_Info *server;
2271         struct cifsFileInfo *smbfile = file->private_data;
2272         struct inode *inode = file_inode(file);
2273         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2274
2275         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2276         if (rc)
2277                 return rc;
2278         inode_lock(inode);
2279
2280         xid = get_xid();
2281
2282         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2283                  file, datasync);
2284
2285         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2286                 rc = cifs_zap_mapping(inode);
2287                 if (rc) {
2288                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2289                         rc = 0; /* don't care about it in fsync */
2290                 }
2291         }
2292
2293         tcon = tlink_tcon(smbfile->tlink);
2294         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2295                 server = tcon->ses->server;
2296                 if (server->ops->flush)
2297                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2298                 else
2299                         rc = -ENOSYS;
2300         }
2301
2302         free_xid(xid);
2303         inode_unlock(inode);
2304         return rc;
2305 }
2306
2307 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2308 {
2309         unsigned int xid;
2310         int rc = 0;
2311         struct cifs_tcon *tcon;
2312         struct TCP_Server_Info *server;
2313         struct cifsFileInfo *smbfile = file->private_data;
2314         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2315         struct inode *inode = file->f_mapping->host;
2316
2317         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2318         if (rc)
2319                 return rc;
2320         inode_lock(inode);
2321
2322         xid = get_xid();
2323
2324         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2325                  file, datasync);
2326
2327         tcon = tlink_tcon(smbfile->tlink);
2328         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2329                 server = tcon->ses->server;
2330                 if (server->ops->flush)
2331                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2332                 else
2333                         rc = -ENOSYS;
2334         }
2335
2336         free_xid(xid);
2337         inode_unlock(inode);
2338         return rc;
2339 }
2340
2341 /*
2342  * As file closes, flush all cached write data for this inode checking
2343  * for write behind errors.
2344  */
2345 int cifs_flush(struct file *file, fl_owner_t id)
2346 {
2347         struct inode *inode = file_inode(file);
2348         int rc = 0;
2349
2350         if (file->f_mode & FMODE_WRITE)
2351                 rc = filemap_write_and_wait(inode->i_mapping);
2352
2353         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2354
2355         return rc;
2356 }
2357
2358 static int
2359 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2360 {
2361         int rc = 0;
2362         unsigned long i;
2363
2364         for (i = 0; i < num_pages; i++) {
2365                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2366                 if (!pages[i]) {
2367                         /*
2368                          * save number of pages we have already allocated and
2369                          * return with ENOMEM error
2370                          */
2371                         num_pages = i;
2372                         rc = -ENOMEM;
2373                         break;
2374                 }
2375         }
2376
2377         if (rc) {
2378                 for (i = 0; i < num_pages; i++)
2379                         put_page(pages[i]);
2380         }
2381         return rc;
2382 }
2383
2384 static inline
2385 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2386 {
2387         size_t num_pages;
2388         size_t clen;
2389
2390         clen = min_t(const size_t, len, wsize);
2391         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2392
2393         if (cur_len)
2394                 *cur_len = clen;
2395
2396         return num_pages;
2397 }
2398
2399 static void
2400 cifs_uncached_writedata_release(struct kref *refcount)
2401 {
2402         int i;
2403         struct cifs_writedata *wdata = container_of(refcount,
2404                                         struct cifs_writedata, refcount);
2405
2406         for (i = 0; i < wdata->nr_pages; i++)
2407                 put_page(wdata->pages[i]);
2408         cifs_writedata_release(refcount);
2409 }
2410
2411 static void
2412 cifs_uncached_writev_complete(struct work_struct *work)
2413 {
2414         struct cifs_writedata *wdata = container_of(work,
2415                                         struct cifs_writedata, work);
2416         struct inode *inode = d_inode(wdata->cfile->dentry);
2417         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2418
2419         spin_lock(&inode->i_lock);
2420         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2421         if (cifsi->server_eof > inode->i_size)
2422                 i_size_write(inode, cifsi->server_eof);
2423         spin_unlock(&inode->i_lock);
2424
2425         complete(&wdata->done);
2426
2427         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2428 }
2429
2430 static int
2431 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2432                       size_t *len, unsigned long *num_pages)
2433 {
2434         size_t save_len, copied, bytes, cur_len = *len;
2435         unsigned long i, nr_pages = *num_pages;
2436
2437         save_len = cur_len;
2438         for (i = 0; i < nr_pages; i++) {
2439                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2440                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2441                 cur_len -= copied;
2442                 /*
2443                  * If we didn't copy as much as we expected, then that
2444                  * may mean we trod into an unmapped area. Stop copying
2445                  * at that point. On the next pass through the big
2446                  * loop, we'll likely end up getting a zero-length
2447                  * write and bailing out of it.
2448                  */
2449                 if (copied < bytes)
2450                         break;
2451         }
2452         cur_len = save_len - cur_len;
2453         *len = cur_len;
2454
2455         /*
2456          * If we have no data to send, then that probably means that
2457          * the copy above failed altogether. That's most likely because
2458          * the address in the iovec was bogus. Return -EFAULT and let
2459          * the caller free anything we allocated and bail out.
2460          */
2461         if (!cur_len)
2462                 return -EFAULT;
2463
2464         /*
2465          * i + 1 now represents the number of pages we actually used in
2466          * the copy phase above.
2467          */
2468         *num_pages = i + 1;
2469         return 0;
2470 }
2471
2472 static int
2473 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2474                      struct cifsFileInfo *open_file,
2475                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2476 {
2477         int rc = 0;
2478         size_t cur_len;
2479         unsigned long nr_pages, num_pages, i;
2480         struct cifs_writedata *wdata;
2481         struct iov_iter saved_from;
2482         loff_t saved_offset = offset;
2483         pid_t pid;
2484         struct TCP_Server_Info *server;
2485
2486         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2487                 pid = open_file->pid;
2488         else
2489                 pid = current->tgid;
2490
2491         server = tlink_tcon(open_file->tlink)->ses->server;
2492         memcpy(&saved_from, from, sizeof(struct iov_iter));
2493
2494         do {
2495                 unsigned int wsize, credits;
2496
2497                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2498                                                    &wsize, &credits);
2499                 if (rc)
2500                         break;
2501
2502                 nr_pages = get_numpages(wsize, len, &cur_len);
2503                 wdata = cifs_writedata_alloc(nr_pages,
2504                                              cifs_uncached_writev_complete);
2505                 if (!wdata) {
2506                         rc = -ENOMEM;
2507                         add_credits_and_wake_if(server, credits, 0);
2508                         break;
2509                 }
2510
2511                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2512                 if (rc) {
2513                         kfree(wdata);
2514                         add_credits_and_wake_if(server, credits, 0);
2515                         break;
2516                 }
2517
2518                 num_pages = nr_pages;
2519                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2520                 if (rc) {
2521                         for (i = 0; i < nr_pages; i++)
2522                                 put_page(wdata->pages[i]);
2523                         kfree(wdata);
2524                         add_credits_and_wake_if(server, credits, 0);
2525                         break;
2526                 }
2527
2528                 /*
2529                  * Bring nr_pages down to the number of pages we actually used,
2530                  * and free any pages that we didn't use.
2531                  */
2532                 for ( ; nr_pages > num_pages; nr_pages--)
2533                         put_page(wdata->pages[nr_pages - 1]);
2534
2535                 wdata->sync_mode = WB_SYNC_ALL;
2536                 wdata->nr_pages = nr_pages;
2537                 wdata->offset = (__u64)offset;
2538                 wdata->cfile = cifsFileInfo_get(open_file);
2539                 wdata->pid = pid;
2540                 wdata->bytes = cur_len;
2541                 wdata->pagesz = PAGE_SIZE;
2542                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2543                 wdata->credits = credits;
2544
2545                 if (!wdata->cfile->invalidHandle ||
2546                     !cifs_reopen_file(wdata->cfile, false))
2547                         rc = server->ops->async_writev(wdata,
2548                                         cifs_uncached_writedata_release);
2549                 if (rc) {
2550                         add_credits_and_wake_if(server, wdata->credits, 0);
2551                         kref_put(&wdata->refcount,
2552                                  cifs_uncached_writedata_release);
2553                         if (rc == -EAGAIN) {
2554                                 memcpy(from, &saved_from,
2555                                        sizeof(struct iov_iter));
2556                                 iov_iter_advance(from, offset - saved_offset);
2557                                 continue;
2558                         }
2559                         break;
2560                 }
2561
2562                 list_add_tail(&wdata->list, wdata_list);
2563                 offset += cur_len;
2564                 len -= cur_len;
2565         } while (len > 0);
2566
2567         return rc;
2568 }
2569
2570 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2571 {
2572         struct file *file = iocb->ki_filp;
2573         ssize_t total_written = 0;
2574         struct cifsFileInfo *open_file;
2575         struct cifs_tcon *tcon;
2576         struct cifs_sb_info *cifs_sb;
2577         struct cifs_writedata *wdata, *tmp;
2578         struct list_head wdata_list;
2579         struct iov_iter saved_from;
2580         int rc;
2581
2582         /*
2583          * BB - optimize the way when signing is disabled. We can drop this
2584          * extra memory-to-memory copying and use iovec buffers for constructing
2585          * write request.
2586          */
2587
2588         rc = generic_write_checks(iocb, from);
2589         if (rc <= 0)
2590                 return rc;
2591
2592         INIT_LIST_HEAD(&wdata_list);
2593         cifs_sb = CIFS_FILE_SB(file);
2594         open_file = file->private_data;
2595         tcon = tlink_tcon(open_file->tlink);
2596
2597         if (!tcon->ses->server->ops->async_writev)
2598                 return -ENOSYS;
2599
2600         memcpy(&saved_from, from, sizeof(struct iov_iter));
2601
2602         rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2603                                   open_file, cifs_sb, &wdata_list);
2604
2605         /*
2606          * If at least one write was successfully sent, then discard any rc
2607          * value from the later writes. If the other write succeeds, then
2608          * we'll end up returning whatever was written. If it fails, then
2609          * we'll get a new rc value from that.
2610          */
2611         if (!list_empty(&wdata_list))
2612                 rc = 0;
2613
2614         /*
2615          * Wait for and collect replies for any successful sends in order of
2616          * increasing offset. Once an error is hit or we get a fatal signal
2617          * while waiting, then return without waiting for any more replies.
2618          */
2619 restart_loop:
2620         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2621                 if (!rc) {
2622                         /* FIXME: freezable too? */
2623                         rc = wait_for_completion_killable(&wdata->done);
2624                         if (rc)
2625                                 rc = -EINTR;
2626                         else if (wdata->result)
2627                                 rc = wdata->result;
2628                         else
2629                                 total_written += wdata->bytes;
2630
2631                         /* resend call if it's a retryable error */
2632                         if (rc == -EAGAIN) {
2633                                 struct list_head tmp_list;
2634                                 struct iov_iter tmp_from;
2635
2636                                 INIT_LIST_HEAD(&tmp_list);
2637                                 list_del_init(&wdata->list);
2638
2639                                 memcpy(&tmp_from, &saved_from,
2640                                        sizeof(struct iov_iter));
2641                                 iov_iter_advance(&tmp_from,
2642                                                  wdata->offset - iocb->ki_pos);
2643
2644                                 rc = cifs_write_from_iter(wdata->offset,
2645                                                 wdata->bytes, &tmp_from,
2646                                                 open_file, cifs_sb, &tmp_list);
2647
2648                                 list_splice(&tmp_list, &wdata_list);
2649
2650                                 kref_put(&wdata->refcount,
2651                                          cifs_uncached_writedata_release);
2652                                 goto restart_loop;
2653                         }
2654                 }
2655                 list_del_init(&wdata->list);
2656                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2657         }
2658
2659         if (unlikely(!total_written))
2660                 return rc;
2661
2662         iocb->ki_pos += total_written;
2663         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2664         cifs_stats_bytes_written(tcon, total_written);
2665         return total_written;
2666 }
2667
2668 static ssize_t
2669 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2670 {
2671         struct file *file = iocb->ki_filp;
2672         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2673         struct inode *inode = file->f_mapping->host;
2674         struct cifsInodeInfo *cinode = CIFS_I(inode);
2675         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2676         ssize_t rc;
2677
2678         /*
2679          * We need to hold the sem to be sure nobody modifies lock list
2680          * with a brlock that prevents writing.
2681          */
2682         down_read(&cinode->lock_sem);
2683         inode_lock(inode);
2684
2685         rc = generic_write_checks(iocb, from);
2686         if (rc <= 0)
2687                 goto out;
2688
2689         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2690                                      server->vals->exclusive_lock_type, NULL,
2691                                      CIFS_WRITE_OP))
2692                 rc = __generic_file_write_iter(iocb, from);
2693         else
2694                 rc = -EACCES;
2695 out:
2696         inode_unlock(inode);
2697
2698         if (rc > 0)
2699                 rc = generic_write_sync(iocb, rc);
2700         up_read(&cinode->lock_sem);
2701         return rc;
2702 }
2703
2704 ssize_t
2705 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2706 {
2707         struct inode *inode = file_inode(iocb->ki_filp);
2708         struct cifsInodeInfo *cinode = CIFS_I(inode);
2709         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2710         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2711                                                 iocb->ki_filp->private_data;
2712         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2713         ssize_t written;
2714
2715         written = cifs_get_writer(cinode);
2716         if (written)
2717                 return written;
2718
2719         if (CIFS_CACHE_WRITE(cinode)) {
2720                 if (cap_unix(tcon->ses) &&
2721                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2722                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2723                         written = generic_file_write_iter(iocb, from);
2724                         goto out;
2725                 }
2726                 written = cifs_writev(iocb, from);
2727                 goto out;
2728         }
2729         /*
2730          * For non-oplocked files in strict cache mode we need to write the data
2731          * to the server exactly from the pos to pos+len-1 rather than flush all
2732          * affected pages because it may cause a error with mandatory locks on
2733          * these pages but not on the region from pos to ppos+len-1.
2734          */
2735         written = cifs_user_writev(iocb, from);
2736         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2737                 /*
2738                  * Windows 7 server can delay breaking level2 oplock if a write
2739                  * request comes - break it on the client to prevent reading
2740                  * an old data.
2741                  */
2742                 cifs_zap_mapping(inode);
2743                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2744                          inode);
2745                 cinode->oplock = 0;
2746         }
2747 out:
2748         cifs_put_writer(cinode);
2749         return written;
2750 }
2751
2752 static struct cifs_readdata *
2753 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2754 {
2755         struct cifs_readdata *rdata;
2756
2757         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2758                         GFP_KERNEL);
2759         if (rdata != NULL) {
2760                 kref_init(&rdata->refcount);
2761                 INIT_LIST_HEAD(&rdata->list);
2762                 init_completion(&rdata->done);
2763                 INIT_WORK(&rdata->work, complete);
2764         }
2765
2766         return rdata;
2767 }
2768
2769 void
2770 cifs_readdata_release(struct kref *refcount)
2771 {
2772         struct cifs_readdata *rdata = container_of(refcount,
2773                                         struct cifs_readdata, refcount);
2774
2775         if (rdata->cfile)
2776                 cifsFileInfo_put(rdata->cfile);
2777
2778         kfree(rdata);
2779 }
2780
2781 static int
2782 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2783 {
2784         int rc = 0;
2785         struct page *page;
2786         unsigned int i;
2787
2788         for (i = 0; i < nr_pages; i++) {
2789                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2790                 if (!page) {
2791                         rc = -ENOMEM;
2792                         break;
2793                 }
2794                 rdata->pages[i] = page;
2795         }
2796
2797         if (rc) {
2798                 for (i = 0; i < nr_pages; i++) {
2799                         put_page(rdata->pages[i]);
2800                         rdata->pages[i] = NULL;
2801                 }
2802         }
2803         return rc;
2804 }
2805
2806 static void
2807 cifs_uncached_readdata_release(struct kref *refcount)
2808 {
2809         struct cifs_readdata *rdata = container_of(refcount,
2810                                         struct cifs_readdata, refcount);
2811         unsigned int i;
2812
2813         for (i = 0; i < rdata->nr_pages; i++) {
2814                 put_page(rdata->pages[i]);
2815                 rdata->pages[i] = NULL;
2816         }
2817         cifs_readdata_release(refcount);
2818 }
2819
2820 /**
2821  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2822  * @rdata:      the readdata response with list of pages holding data
2823  * @iter:       destination for our data
2824  *
2825  * This function copies data from a list of pages in a readdata response into
2826  * an array of iovecs. It will first calculate where the data should go
2827  * based on the info in the readdata and then copy the data into that spot.
2828  */
2829 static int
2830 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2831 {
2832         size_t remaining = rdata->got_bytes;
2833         unsigned int i;
2834
2835         for (i = 0; i < rdata->nr_pages; i++) {
2836                 struct page *page = rdata->pages[i];
2837                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2838                 size_t written = copy_page_to_iter(page, 0, copy, iter);
2839                 remaining -= written;
2840                 if (written < copy && iov_iter_count(iter) > 0)
2841                         break;
2842         }
2843         return remaining ? -EFAULT : 0;
2844 }
2845
2846 static void
2847 cifs_uncached_readv_complete(struct work_struct *work)
2848 {
2849         struct cifs_readdata *rdata = container_of(work,
2850                                                 struct cifs_readdata, work);
2851
2852         complete(&rdata->done);
2853         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2854 }
2855
2856 static int
2857 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2858                         struct cifs_readdata *rdata, unsigned int len)
2859 {
2860         int result = 0;
2861         unsigned int i;
2862         unsigned int nr_pages = rdata->nr_pages;
2863
2864         rdata->got_bytes = 0;
2865         rdata->tailsz = PAGE_SIZE;
2866         for (i = 0; i < nr_pages; i++) {
2867                 struct page *page = rdata->pages[i];
2868                 size_t n;
2869
2870                 if (len <= 0) {
2871                         /* no need to hold page hostage */
2872                         rdata->pages[i] = NULL;
2873                         rdata->nr_pages--;
2874                         put_page(page);
2875                         continue;
2876                 }
2877                 n = len;
2878                 if (len >= PAGE_SIZE) {
2879                         /* enough data to fill the page */
2880                         n = PAGE_SIZE;
2881                         len -= n;
2882                 } else {
2883                         zero_user(page, len, PAGE_SIZE - len);
2884                         rdata->tailsz = len;
2885                         len = 0;
2886                 }
2887                 result = cifs_read_page_from_socket(server, page, n);
2888                 if (result < 0)
2889                         break;
2890
2891                 rdata->got_bytes += result;
2892         }
2893
2894         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2895                                                 rdata->got_bytes : result;
2896 }
2897
2898 static int
2899 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2900                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2901 {
2902         struct cifs_readdata *rdata;
2903         unsigned int npages, rsize, credits;
2904         size_t cur_len;
2905         int rc;
2906         pid_t pid;
2907         struct TCP_Server_Info *server;
2908
2909         server = tlink_tcon(open_file->tlink)->ses->server;
2910
2911         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2912                 pid = open_file->pid;
2913         else
2914                 pid = current->tgid;
2915
2916         do {
2917                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2918                                                    &rsize, &credits);
2919                 if (rc)
2920                         break;
2921
2922                 cur_len = min_t(const size_t, len, rsize);
2923                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2924
2925                 /* allocate a readdata struct */
2926                 rdata = cifs_readdata_alloc(npages,
2927                                             cifs_uncached_readv_complete);
2928                 if (!rdata) {
2929                         add_credits_and_wake_if(server, credits, 0);
2930                         rc = -ENOMEM;
2931                         break;
2932                 }
2933
2934                 rc = cifs_read_allocate_pages(rdata, npages);
2935                 if (rc)
2936                         goto error;
2937
2938                 rdata->cfile = cifsFileInfo_get(open_file);
2939                 rdata->nr_pages = npages;
2940                 rdata->offset = offset;
2941                 rdata->bytes = cur_len;
2942                 rdata->pid = pid;
2943                 rdata->pagesz = PAGE_SIZE;
2944                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2945                 rdata->credits = credits;
2946
2947                 if (!rdata->cfile->invalidHandle ||
2948                     !cifs_reopen_file(rdata->cfile, true))
2949                         rc = server->ops->async_readv(rdata);
2950 error:
2951                 if (rc) {
2952                         add_credits_and_wake_if(server, rdata->credits, 0);
2953                         kref_put(&rdata->refcount,
2954                                  cifs_uncached_readdata_release);
2955                         if (rc == -EAGAIN)
2956                                 continue;
2957                         break;
2958                 }
2959
2960                 list_add_tail(&rdata->list, rdata_list);
2961                 offset += cur_len;
2962                 len -= cur_len;
2963         } while (len > 0);
2964
2965         return rc;
2966 }
2967
2968 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2969 {
2970         struct file *file = iocb->ki_filp;
2971         ssize_t rc;
2972         size_t len;
2973         ssize_t total_read = 0;
2974         loff_t offset = iocb->ki_pos;
2975         struct cifs_sb_info *cifs_sb;
2976         struct cifs_tcon *tcon;
2977         struct cifsFileInfo *open_file;
2978         struct cifs_readdata *rdata, *tmp;
2979         struct list_head rdata_list;
2980
2981         len = iov_iter_count(to);
2982         if (!len)
2983                 return 0;
2984
2985         INIT_LIST_HEAD(&rdata_list);
2986         cifs_sb = CIFS_FILE_SB(file);
2987         open_file = file->private_data;
2988         tcon = tlink_tcon(open_file->tlink);
2989
2990         if (!tcon->ses->server->ops->async_readv)
2991                 return -ENOSYS;
2992
2993         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2994                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2995
2996         rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
2997
2998         /* if at least one read request send succeeded, then reset rc */
2999         if (!list_empty(&rdata_list))
3000                 rc = 0;
3001
3002         len = iov_iter_count(to);
3003         /* the loop below should proceed in the order of increasing offsets */
3004 again:
3005         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3006                 if (!rc) {
3007                         /* FIXME: freezable sleep too? */
3008                         rc = wait_for_completion_killable(&rdata->done);
3009                         if (rc)
3010                                 rc = -EINTR;
3011                         else if (rdata->result == -EAGAIN) {
3012                                 /* resend call if it's a retryable error */
3013                                 struct list_head tmp_list;
3014                                 unsigned int got_bytes = rdata->got_bytes;
3015
3016                                 list_del_init(&rdata->list);
3017                                 INIT_LIST_HEAD(&tmp_list);
3018
3019                                 /*
3020                                  * Got a part of data and then reconnect has
3021                                  * happened -- fill the buffer and continue
3022                                  * reading.
3023                                  */
3024                                 if (got_bytes && got_bytes < rdata->bytes) {
3025                                         rc = cifs_readdata_to_iov(rdata, to);
3026                                         if (rc) {
3027                                                 kref_put(&rdata->refcount,
3028                                                 cifs_uncached_readdata_release);
3029                                                 continue;
3030                                         }
3031                                 }
3032
3033                                 rc = cifs_send_async_read(
3034                                                 rdata->offset + got_bytes,
3035                                                 rdata->bytes - got_bytes,
3036                                                 rdata->cfile, cifs_sb,
3037                                                 &tmp_list);
3038
3039                                 list_splice(&tmp_list, &rdata_list);
3040
3041                                 kref_put(&rdata->refcount,
3042                                          cifs_uncached_readdata_release);
3043                                 goto again;
3044                         } else if (rdata->result)
3045                                 rc = rdata->result;
3046                         else
3047                                 rc = cifs_readdata_to_iov(rdata, to);
3048
3049                         /* if there was a short read -- discard anything left */
3050                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3051                                 rc = -ENODATA;
3052                 }
3053                 list_del_init(&rdata->list);
3054                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3055         }
3056
3057         total_read = len - iov_iter_count(to);
3058
3059         cifs_stats_bytes_read(tcon, total_read);
3060
3061         /* mask nodata case */
3062         if (rc == -ENODATA)
3063                 rc = 0;
3064
3065         if (total_read) {
3066                 iocb->ki_pos += total_read;
3067                 return total_read;
3068         }
3069         return rc;
3070 }
3071
3072 ssize_t
3073 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3074 {
3075         struct inode *inode = file_inode(iocb->ki_filp);
3076         struct cifsInodeInfo *cinode = CIFS_I(inode);
3077         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3078         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3079                                                 iocb->ki_filp->private_data;
3080         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3081         int rc = -EACCES;
3082
3083         /*
3084          * In strict cache mode we need to read from the server all the time
3085          * if we don't have level II oplock because the server can delay mtime
3086          * change - so we can't make a decision about inode invalidating.
3087          * And we can also fail with pagereading if there are mandatory locks
3088          * on pages affected by this read but not on the region from pos to
3089          * pos+len-1.
3090          */
3091         if (!CIFS_CACHE_READ(cinode))
3092                 return cifs_user_readv(iocb, to);
3093
3094         if (cap_unix(tcon->ses) &&
3095             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3096             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3097                 return generic_file_read_iter(iocb, to);
3098
3099         /*
3100          * We need to hold the sem to be sure nobody modifies lock list
3101          * with a brlock that prevents reading.
3102          */
3103         down_read(&cinode->lock_sem);
3104         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3105                                      tcon->ses->server->vals->shared_lock_type,
3106                                      NULL, CIFS_READ_OP))
3107                 rc = generic_file_read_iter(iocb, to);
3108         up_read(&cinode->lock_sem);
3109         return rc;
3110 }
3111
3112 static ssize_t
3113 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3114 {
3115         int rc = -EACCES;
3116         unsigned int bytes_read = 0;
3117         unsigned int total_read;
3118         unsigned int current_read_size;
3119         unsigned int rsize;
3120         struct cifs_sb_info *cifs_sb;
3121         struct cifs_tcon *tcon;
3122         struct TCP_Server_Info *server;
3123         unsigned int xid;
3124         char *cur_offset;
3125         struct cifsFileInfo *open_file;
3126         struct cifs_io_parms io_parms;
3127         int buf_type = CIFS_NO_BUFFER;
3128         __u32 pid;
3129
3130         xid = get_xid();
3131         cifs_sb = CIFS_FILE_SB(file);
3132
3133         /* FIXME: set up handlers for larger reads and/or convert to async */
3134         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3135
3136         if (file->private_data == NULL) {
3137                 rc = -EBADF;
3138                 free_xid(xid);
3139                 return rc;
3140         }
3141         open_file = file->private_data;
3142         tcon = tlink_tcon(open_file->tlink);
3143         server = tcon->ses->server;
3144
3145         if (!server->ops->sync_read) {
3146                 free_xid(xid);
3147                 return -ENOSYS;
3148         }
3149
3150         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3151                 pid = open_file->pid;
3152         else
3153                 pid = current->tgid;
3154
3155         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3156                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3157
3158         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3159              total_read += bytes_read, cur_offset += bytes_read) {
3160                 do {
3161                         current_read_size = min_t(uint, read_size - total_read,
3162                                                   rsize);
3163                         /*
3164                          * For windows me and 9x we do not want to request more
3165                          * than it negotiated since it will refuse the read
3166                          * then.
3167                          */
3168                         if ((tcon->ses) && !(tcon->ses->capabilities &
3169                                 tcon->ses->server->vals->cap_large_files)) {
3170                                 current_read_size = min_t(uint,
3171                                         current_read_size, CIFSMaxBufSize);
3172                         }
3173                         if (open_file->invalidHandle) {
3174                                 rc = cifs_reopen_file(open_file, true);
3175                                 if (rc != 0)
3176                                         break;
3177                         }
3178                         io_parms.pid = pid;
3179                         io_parms.tcon = tcon;
3180                         io_parms.offset = *offset;
3181                         io_parms.length = current_read_size;
3182                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3183                                                     &bytes_read, &cur_offset,
3184                                                     &buf_type);
3185                 } while (rc == -EAGAIN);
3186
3187                 if (rc || (bytes_read == 0)) {
3188                         if (total_read) {
3189                                 break;
3190                         } else {
3191                                 free_xid(xid);
3192                                 return rc;
3193                         }
3194                 } else {
3195                         cifs_stats_bytes_read(tcon, total_read);
3196                         *offset += bytes_read;
3197                 }
3198         }
3199         free_xid(xid);
3200         return total_read;
3201 }
3202
3203 /*
3204  * If the page is mmap'ed into a process' page tables, then we need to make
3205  * sure that it doesn't change while being written back.
3206  */
3207 static int
3208 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3209 {
3210         struct page *page = vmf->page;
3211
3212         lock_page(page);
3213         return VM_FAULT_LOCKED;
3214 }
3215
3216 static const struct vm_operations_struct cifs_file_vm_ops = {
3217         .fault = filemap_fault,
3218         .map_pages = filemap_map_pages,
3219         .page_mkwrite = cifs_page_mkwrite,
3220 };
3221
3222 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3223 {
3224         int rc, xid;
3225         struct inode *inode = file_inode(file);
3226
3227         xid = get_xid();
3228
3229         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3230                 rc = cifs_zap_mapping(inode);
3231                 if (rc)
3232                         return rc;
3233         }
3234
3235         rc = generic_file_mmap(file, vma);
3236         if (rc == 0)
3237                 vma->vm_ops = &cifs_file_vm_ops;
3238         free_xid(xid);
3239         return rc;
3240 }
3241
3242 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3243 {
3244         int rc, xid;
3245
3246         xid = get_xid();
3247         rc = cifs_revalidate_file(file);
3248         if (rc) {
3249                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3250                          rc);
3251                 free_xid(xid);
3252                 return rc;
3253         }
3254         rc = generic_file_mmap(file, vma);
3255         if (rc == 0)
3256                 vma->vm_ops = &cifs_file_vm_ops;
3257         free_xid(xid);
3258         return rc;
3259 }
3260
3261 static void
3262 cifs_readv_complete(struct work_struct *work)
3263 {
3264         unsigned int i, got_bytes;
3265         struct cifs_readdata *rdata = container_of(work,
3266                                                 struct cifs_readdata, work);
3267
3268         got_bytes = rdata->got_bytes;
3269         for (i = 0; i < rdata->nr_pages; i++) {
3270                 struct page *page = rdata->pages[i];
3271
3272                 lru_cache_add_file(page);
3273
3274                 if (rdata->result == 0 ||
3275                     (rdata->result == -EAGAIN && got_bytes)) {
3276                         flush_dcache_page(page);
3277                         SetPageUptodate(page);
3278                 }
3279
3280                 unlock_page(page);
3281
3282                 if (rdata->result == 0 ||
3283                     (rdata->result == -EAGAIN && got_bytes))
3284                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3285
3286                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3287
3288                 put_page(page);
3289                 rdata->pages[i] = NULL;
3290         }
3291         kref_put(&rdata->refcount, cifs_readdata_release);
3292 }
3293
3294 static int
3295 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3296                         struct cifs_readdata *rdata, unsigned int len)
3297 {
3298         int result = 0;
3299         unsigned int i;
3300         u64 eof;
3301         pgoff_t eof_index;
3302         unsigned int nr_pages = rdata->nr_pages;
3303
3304         /* determine the eof that the server (probably) has */
3305         eof = CIFS_I(rdata->mapping->host)->server_eof;
3306         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3307         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3308
3309         rdata->got_bytes = 0;
3310         rdata->tailsz = PAGE_SIZE;
3311         for (i = 0; i < nr_pages; i++) {
3312                 struct page *page = rdata->pages[i];
3313                 size_t n = PAGE_SIZE;
3314
3315                 if (len >= PAGE_SIZE) {
3316                         len -= PAGE_SIZE;
3317                 } else if (len > 0) {
3318                         /* enough for partial page, fill and zero the rest */
3319                         zero_user(page, len, PAGE_SIZE - len);
3320                         n = rdata->tailsz = len;
3321                         len = 0;
3322                 } else if (page->index > eof_index) {
3323                         /*
3324                          * The VFS will not try to do readahead past the
3325                          * i_size, but it's possible that we have outstanding
3326                          * writes with gaps in the middle and the i_size hasn't
3327                          * caught up yet. Populate those with zeroed out pages
3328                          * to prevent the VFS from repeatedly attempting to
3329                          * fill them until the writes are flushed.
3330                          */
3331                         zero_user(page, 0, PAGE_SIZE);
3332                         lru_cache_add_file(page);
3333                         flush_dcache_page(page);
3334                         SetPageUptodate(page);
3335                         unlock_page(page);
3336                         put_page(page);
3337                         rdata->pages[i] = NULL;
3338                         rdata->nr_pages--;
3339                         continue;
3340                 } else {
3341                         /* no need to hold page hostage */
3342                         lru_cache_add_file(page);
3343                         unlock_page(page);
3344                         put_page(page);
3345                         rdata->pages[i] = NULL;
3346                         rdata->nr_pages--;
3347                         continue;
3348                 }
3349
3350                 result = cifs_read_page_from_socket(server, page, n);
3351                 if (result < 0)
3352                         break;
3353
3354                 rdata->got_bytes += result;
3355         }
3356
3357         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3358                                                 rdata->got_bytes : result;
3359 }
3360
3361 static int
3362 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3363                     unsigned int rsize, struct list_head *tmplist,
3364                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3365 {
3366         struct page *page, *tpage;
3367         unsigned int expected_index;
3368         int rc;
3369         gfp_t gfp = readahead_gfp_mask(mapping);
3370
3371         INIT_LIST_HEAD(tmplist);
3372
3373         page = list_entry(page_list->prev, struct page, lru);
3374
3375         /*
3376          * Lock the page and put it in the cache. Since no one else
3377          * should have access to this page, we're safe to simply set
3378          * PG_locked without checking it first.
3379          */
3380         __SetPageLocked(page);
3381         rc = add_to_page_cache_locked(page, mapping,
3382                                       page->index, gfp);
3383
3384         /* give up if we can't stick it in the cache */
3385         if (rc) {
3386                 __ClearPageLocked(page);
3387                 return rc;
3388         }
3389
3390         /* move first page to the tmplist */
3391         *offset = (loff_t)page->index << PAGE_SHIFT;
3392         *bytes = PAGE_SIZE;
3393         *nr_pages = 1;
3394         list_move_tail(&page->lru, tmplist);
3395
3396         /* now try and add more pages onto the request */
3397         expected_index = page->index + 1;
3398         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3399                 /* discontinuity ? */
3400                 if (page->index != expected_index)
3401                         break;
3402
3403                 /* would this page push the read over the rsize? */
3404                 if (*bytes + PAGE_SIZE > rsize)
3405                         break;
3406
3407                 __SetPageLocked(page);
3408                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3409                         __ClearPageLocked(page);
3410                         break;
3411                 }
3412                 list_move_tail(&page->lru, tmplist);
3413                 (*bytes) += PAGE_SIZE;
3414                 expected_index++;
3415                 (*nr_pages)++;
3416         }
3417         return rc;
3418 }
3419
3420 static int cifs_readpages(struct file *file, struct address_space *mapping,
3421         struct list_head *page_list, unsigned num_pages)
3422 {
3423         int rc;
3424         struct list_head tmplist;
3425         struct cifsFileInfo *open_file = file->private_data;
3426         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3427         struct TCP_Server_Info *server;
3428         pid_t pid;
3429
3430         /*
3431          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3432          * immediately if the cookie is negative
3433          *
3434          * After this point, every page in the list might have PG_fscache set,
3435          * so we will need to clean that up off of every page we don't use.
3436          */
3437         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3438                                          &num_pages);
3439         if (rc == 0)
3440                 return rc;
3441
3442         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3443                 pid = open_file->pid;
3444         else
3445                 pid = current->tgid;
3446
3447         rc = 0;
3448         server = tlink_tcon(open_file->tlink)->ses->server;
3449
3450         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3451                  __func__, file, mapping, num_pages);
3452
3453         /*
3454          * Start with the page at end of list and move it to private
3455          * list. Do the same with any following pages until we hit
3456          * the rsize limit, hit an index discontinuity, or run out of
3457          * pages. Issue the async read and then start the loop again
3458          * until the list is empty.
3459          *
3460          * Note that list order is important. The page_list is in
3461          * the order of declining indexes. When we put the pages in
3462          * the rdata->pages, then we want them in increasing order.
3463          */
3464         while (!list_empty(page_list)) {
3465                 unsigned int i, nr_pages, bytes, rsize;
3466                 loff_t offset;
3467                 struct page *page, *tpage;
3468                 struct cifs_readdata *rdata;
3469                 unsigned credits;
3470
3471                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3472                                                    &rsize, &credits);
3473                 if (rc)
3474                         break;
3475
3476                 /*
3477                  * Give up immediately if rsize is too small to read an entire
3478                  * page. The VFS will fall back to readpage. We should never
3479                  * reach this point however since we set ra_pages to 0 when the
3480                  * rsize is smaller than a cache page.
3481                  */
3482                 if (unlikely(rsize < PAGE_SIZE)) {
3483                         add_credits_and_wake_if(server, credits, 0);
3484                         return 0;
3485                 }
3486
3487                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3488                                          &nr_pages, &offset, &bytes);
3489                 if (rc) {
3490                         add_credits_and_wake_if(server, credits, 0);
3491                         break;
3492                 }
3493
3494                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3495                 if (!rdata) {
3496                         /* best to give up if we're out of mem */
3497                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3498                                 list_del(&page->lru);
3499                                 lru_cache_add_file(page);
3500                                 unlock_page(page);
3501                                 put_page(page);
3502                         }
3503                         rc = -ENOMEM;
3504                         add_credits_and_wake_if(server, credits, 0);
3505                         break;
3506                 }
3507
3508                 rdata->cfile = cifsFileInfo_get(open_file);
3509                 rdata->mapping = mapping;
3510                 rdata->offset = offset;
3511                 rdata->bytes = bytes;
3512                 rdata->pid = pid;
3513                 rdata->pagesz = PAGE_SIZE;
3514                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3515                 rdata->credits = credits;
3516
3517                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3518                         list_del(&page->lru);
3519                         rdata->pages[rdata->nr_pages++] = page;
3520                 }
3521
3522                 if (!rdata->cfile->invalidHandle ||
3523                     !cifs_reopen_file(rdata->cfile, true))
3524                         rc = server->ops->async_readv(rdata);
3525                 if (rc) {
3526                         add_credits_and_wake_if(server, rdata->credits, 0);
3527                         for (i = 0; i < rdata->nr_pages; i++) {
3528                                 page = rdata->pages[i];
3529                                 lru_cache_add_file(page);
3530                                 unlock_page(page);
3531                                 put_page(page);
3532                         }
3533                         /* Fallback to the readpage in error/reconnect cases */
3534                         kref_put(&rdata->refcount, cifs_readdata_release);
3535                         break;
3536                 }
3537
3538                 kref_put(&rdata->refcount, cifs_readdata_release);
3539         }
3540
3541         /* Any pages that have been shown to fscache but didn't get added to
3542          * the pagecache must be uncached before they get returned to the
3543          * allocator.
3544          */
3545         cifs_fscache_readpages_cancel(mapping->host, page_list);
3546         return rc;
3547 }
3548
3549 /*
3550  * cifs_readpage_worker must be called with the page pinned
3551  */
3552 static int cifs_readpage_worker(struct file *file, struct page *page,
3553         loff_t *poffset)
3554 {
3555         char *read_data;
3556         int rc;
3557
3558         /* Is the page cached? */
3559         rc = cifs_readpage_from_fscache(file_inode(file), page);
3560         if (rc == 0)
3561                 goto read_complete;
3562
3563         read_data = kmap(page);
3564         /* for reads over a certain size could initiate async read ahead */
3565
3566         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3567
3568         if (rc < 0)
3569                 goto io_error;
3570         else
3571                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3572
3573         file_inode(file)->i_atime =
3574                 current_fs_time(file_inode(file)->i_sb);
3575
3576         if (PAGE_SIZE > rc)
3577                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3578
3579         flush_dcache_page(page);
3580         SetPageUptodate(page);
3581
3582         /* send this page to the cache */
3583         cifs_readpage_to_fscache(file_inode(file), page);
3584
3585         rc = 0;
3586
3587 io_error:
3588         kunmap(page);
3589         unlock_page(page);
3590
3591 read_complete:
3592         return rc;
3593 }
3594
3595 static int cifs_readpage(struct file *file, struct page *page)
3596 {
3597         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3598         int rc = -EACCES;
3599         unsigned int xid;
3600
3601         xid = get_xid();
3602
3603         if (file->private_data == NULL) {
3604                 rc = -EBADF;
3605                 free_xid(xid);
3606                 return rc;
3607         }
3608
3609         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3610                  page, (int)offset, (int)offset);
3611
3612         rc = cifs_readpage_worker(file, page, &offset);
3613
3614         free_xid(xid);
3615         return rc;
3616 }
3617
3618 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3619 {
3620         struct cifsFileInfo *open_file;
3621
3622         spin_lock(&cifs_file_list_lock);
3623         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3624                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3625                         spin_unlock(&cifs_file_list_lock);
3626                         return 1;
3627                 }
3628         }
3629         spin_unlock(&cifs_file_list_lock);
3630         return 0;
3631 }
3632
3633 /* We do not want to update the file size from server for inodes
3634    open for write - to avoid races with writepage extending
3635    the file - in the future we could consider allowing
3636    refreshing the inode only on increases in the file size
3637    but this is tricky to do without racing with writebehind
3638    page caching in the current Linux kernel design */
3639 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3640 {
3641         if (!cifsInode)
3642                 return true;
3643
3644         if (is_inode_writable(cifsInode)) {
3645                 /* This inode is open for write at least once */
3646                 struct cifs_sb_info *cifs_sb;
3647
3648                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3649                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3650                         /* since no page cache to corrupt on directio
3651                         we can change size safely */
3652                         return true;
3653                 }
3654
3655                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3656                         return true;
3657
3658                 return false;
3659         } else
3660                 return true;
3661 }
3662
3663 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3664                         loff_t pos, unsigned len, unsigned flags,
3665                         struct page **pagep, void **fsdata)
3666 {
3667         int oncethru = 0;
3668         pgoff_t index = pos >> PAGE_SHIFT;
3669         loff_t offset = pos & (PAGE_SIZE - 1);
3670         loff_t page_start = pos & PAGE_MASK;
3671         loff_t i_size;
3672         struct page *page;
3673         int rc = 0;
3674
3675         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3676
3677 start:
3678         page = grab_cache_page_write_begin(mapping, index, flags);
3679         if (!page) {
3680                 rc = -ENOMEM;
3681                 goto out;
3682         }
3683
3684         if (PageUptodate(page))
3685                 goto out;
3686
3687         /*
3688          * If we write a full page it will be up to date, no need to read from
3689          * the server. If the write is short, we'll end up doing a sync write
3690          * instead.
3691          */
3692         if (len == PAGE_SIZE)
3693                 goto out;
3694
3695         /*
3696          * optimize away the read when we have an oplock, and we're not
3697          * expecting to use any of the data we'd be reading in. That
3698          * is, when the page lies beyond the EOF, or straddles the EOF
3699          * and the write will cover all of the existing data.
3700          */
3701         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3702                 i_size = i_size_read(mapping->host);
3703                 if (page_start >= i_size ||
3704                     (offset == 0 && (pos + len) >= i_size)) {
3705                         zero_user_segments(page, 0, offset,
3706                                            offset + len,
3707                                            PAGE_SIZE);
3708                         /*
3709                          * PageChecked means that the parts of the page
3710                          * to which we're not writing are considered up
3711                          * to date. Once the data is copied to the
3712                          * page, it can be set uptodate.
3713                          */
3714                         SetPageChecked(page);
3715                         goto out;
3716                 }
3717         }
3718
3719         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3720                 /*
3721                  * might as well read a page, it is fast enough. If we get
3722                  * an error, we don't need to return it. cifs_write_end will
3723                  * do a sync write instead since PG_uptodate isn't set.
3724                  */
3725                 cifs_readpage_worker(file, page, &page_start);
3726                 put_page(page);
3727                 oncethru = 1;
3728                 goto start;
3729         } else {
3730                 /* we could try using another file handle if there is one -
3731                    but how would we lock it to prevent close of that handle
3732                    racing with this read? In any case
3733                    this will be written out by write_end so is fine */
3734         }
3735 out:
3736         *pagep = page;
3737         return rc;
3738 }
3739
3740 static int cifs_release_page(struct page *page, gfp_t gfp)
3741 {
3742         if (PagePrivate(page))
3743                 return 0;
3744
3745         return cifs_fscache_release_page(page, gfp);
3746 }
3747
3748 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3749                                  unsigned int length)
3750 {
3751         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3752
3753         if (offset == 0 && length == PAGE_SIZE)
3754                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3755 }
3756
3757 static int cifs_launder_page(struct page *page)
3758 {
3759         int rc = 0;
3760         loff_t range_start = page_offset(page);
3761         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3762         struct writeback_control wbc = {
3763                 .sync_mode = WB_SYNC_ALL,
3764                 .nr_to_write = 0,
3765                 .range_start = range_start,
3766                 .range_end = range_end,
3767         };
3768
3769         cifs_dbg(FYI, "Launder page: %p\n", page);
3770
3771         if (clear_page_dirty_for_io(page))
3772                 rc = cifs_writepage_locked(page, &wbc);
3773
3774         cifs_fscache_invalidate_page(page, page->mapping->host);
3775         return rc;
3776 }
3777
3778 void cifs_oplock_break(struct work_struct *work)
3779 {
3780         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3781                                                   oplock_break);
3782         struct inode *inode = d_inode(cfile->dentry);
3783         struct cifsInodeInfo *cinode = CIFS_I(inode);
3784         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3785         struct TCP_Server_Info *server = tcon->ses->server;
3786         int rc = 0;
3787
3788         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3789                         TASK_UNINTERRUPTIBLE);
3790
3791         server->ops->downgrade_oplock(server, cinode,
3792                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3793
3794         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3795                                                 cifs_has_mand_locks(cinode)) {
3796                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3797                          inode);
3798                 cinode->oplock = 0;
3799         }
3800
3801         if (inode && S_ISREG(inode->i_mode)) {
3802                 if (CIFS_CACHE_READ(cinode))
3803                         break_lease(inode, O_RDONLY);
3804                 else
3805                         break_lease(inode, O_WRONLY);
3806                 rc = filemap_fdatawrite(inode->i_mapping);
3807                 if (!CIFS_CACHE_READ(cinode)) {
3808                         rc = filemap_fdatawait(inode->i_mapping);
3809                         mapping_set_error(inode->i_mapping, rc);
3810                         cifs_zap_mapping(inode);
3811                 }
3812                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3813         }
3814
3815         rc = cifs_push_locks(cfile);
3816         if (rc)
3817                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3818
3819         /*
3820          * releasing stale oplock after recent reconnect of smb session using
3821          * a now incorrect file handle is not a data integrity issue but do
3822          * not bother sending an oplock release if session to server still is
3823          * disconnected since oplock already released by the server
3824          */
3825         if (!cfile->oplock_break_cancelled) {
3826                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3827                                                              cinode);
3828                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3829         }
3830         cifs_done_oplock_break(cinode);
3831 }
3832
3833 /*
3834  * The presence of cifs_direct_io() in the address space ops vector
3835  * allowes open() O_DIRECT flags which would have failed otherwise.
3836  *
3837  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3838  * so this method should never be called.
3839  *
3840  * Direct IO is not yet supported in the cached mode. 
3841  */
3842 static ssize_t
3843 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3844 {
3845         /*
3846          * FIXME
3847          * Eventually need to support direct IO for non forcedirectio mounts
3848          */
3849         return -EINVAL;
3850 }
3851
3852
3853 const struct address_space_operations cifs_addr_ops = {
3854         .readpage = cifs_readpage,
3855         .readpages = cifs_readpages,
3856         .writepage = cifs_writepage,
3857         .writepages = cifs_writepages,
3858         .write_begin = cifs_write_begin,
3859         .write_end = cifs_write_end,
3860         .set_page_dirty = __set_page_dirty_nobuffers,
3861         .releasepage = cifs_release_page,
3862         .direct_IO = cifs_direct_io,
3863         .invalidatepage = cifs_invalidate_page,
3864         .launder_page = cifs_launder_page,
3865 };
3866
3867 /*
3868  * cifs_readpages requires the server to support a buffer large enough to
3869  * contain the header plus one complete page of data.  Otherwise, we need
3870  * to leave cifs_readpages out of the address space operations.
3871  */
3872 const struct address_space_operations cifs_addr_ops_smallbuf = {
3873         .readpage = cifs_readpage,
3874         .writepage = cifs_writepage,
3875         .writepages = cifs_writepages,
3876         .write_begin = cifs_write_begin,
3877         .write_end = cifs_write_end,
3878         .set_page_dirty = __set_page_dirty_nobuffers,
3879         .releasepage = cifs_release_page,
3880         .invalidatepage = cifs_invalidate_page,
3881         .launder_page = cifs_launder_page,
3882 };