CIFS: Fix persistent handles re-opening on reconnect
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         oparms.tcon = tcon;
228         oparms.cifs_sb = cifs_sb;
229         oparms.desired_access = desired_access;
230         oparms.create_options = create_options;
231         oparms.disposition = disposition;
232         oparms.path = full_path;
233         oparms.fid = fid;
234         oparms.reconnect = false;
235
236         rc = server->ops->open(xid, &oparms, oplock, buf);
237
238         if (rc)
239                 goto out;
240
241         if (tcon->unix_ext)
242                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
243                                               xid);
244         else
245                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246                                          xid, fid);
247
248 out:
249         kfree(buf);
250         return rc;
251 }
252
253 static bool
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
255 {
256         struct cifs_fid_locks *cur;
257         bool has_locks = false;
258
259         down_read(&cinode->lock_sem);
260         list_for_each_entry(cur, &cinode->llist, llist) {
261                 if (!list_empty(&cur->locks)) {
262                         has_locks = true;
263                         break;
264                 }
265         }
266         up_read(&cinode->lock_sem);
267         return has_locks;
268 }
269
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272                   struct tcon_link *tlink, __u32 oplock)
273 {
274         struct dentry *dentry = file_dentry(file);
275         struct inode *inode = d_inode(dentry);
276         struct cifsInodeInfo *cinode = CIFS_I(inode);
277         struct cifsFileInfo *cfile;
278         struct cifs_fid_locks *fdlocks;
279         struct cifs_tcon *tcon = tlink_tcon(tlink);
280         struct TCP_Server_Info *server = tcon->ses->server;
281
282         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
283         if (cfile == NULL)
284                 return cfile;
285
286         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
287         if (!fdlocks) {
288                 kfree(cfile);
289                 return NULL;
290         }
291
292         INIT_LIST_HEAD(&fdlocks->locks);
293         fdlocks->cfile = cfile;
294         cfile->llist = fdlocks;
295         down_write(&cinode->lock_sem);
296         list_add(&fdlocks->llist, &cinode->llist);
297         up_write(&cinode->lock_sem);
298
299         cfile->count = 1;
300         cfile->pid = current->tgid;
301         cfile->uid = current_fsuid();
302         cfile->dentry = dget(dentry);
303         cfile->f_flags = file->f_flags;
304         cfile->invalidHandle = false;
305         cfile->tlink = cifs_get_tlink(tlink);
306         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307         mutex_init(&cfile->fh_mutex);
308         spin_lock_init(&cfile->file_info_lock);
309
310         cifs_sb_active(inode->i_sb);
311
312         /*
313          * If the server returned a read oplock and we have mandatory brlocks,
314          * set oplock level to None.
315          */
316         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
318                 oplock = 0;
319         }
320
321         spin_lock(&tcon->open_file_lock);
322         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323                 oplock = fid->pending_open->oplock;
324         list_del(&fid->pending_open->olist);
325
326         fid->purge_cache = false;
327         server->ops->set_fid(cfile, fid, oplock);
328
329         list_add(&cfile->tlist, &tcon->openFileList);
330
331         /* if readable file instance put first in list*/
332         if (file->f_mode & FMODE_READ)
333                 list_add(&cfile->flist, &cinode->openFileList);
334         else
335                 list_add_tail(&cfile->flist, &cinode->openFileList);
336         spin_unlock(&tcon->open_file_lock);
337
338         if (fid->purge_cache)
339                 cifs_zap_mapping(inode);
340
341         file->private_data = cfile;
342         return cfile;
343 }
344
345 struct cifsFileInfo *
346 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 {
348         spin_lock(&cifs_file->file_info_lock);
349         cifsFileInfo_get_locked(cifs_file);
350         spin_unlock(&cifs_file->file_info_lock);
351         return cifs_file;
352 }
353
354 /*
355  * Release a reference on the file private data. This may involve closing
356  * the filehandle out on the server. Must be called without holding
357  * tcon->open_file_lock and cifs_file->file_info_lock.
358  */
359 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 {
361         struct inode *inode = d_inode(cifs_file->dentry);
362         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
363         struct TCP_Server_Info *server = tcon->ses->server;
364         struct cifsInodeInfo *cifsi = CIFS_I(inode);
365         struct super_block *sb = inode->i_sb;
366         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
367         struct cifsLockInfo *li, *tmp;
368         struct cifs_fid fid;
369         struct cifs_pending_open open;
370         bool oplock_break_cancelled;
371
372         spin_lock(&tcon->open_file_lock);
373
374         spin_lock(&cifs_file->file_info_lock);
375         if (--cifs_file->count > 0) {
376                 spin_unlock(&cifs_file->file_info_lock);
377                 spin_unlock(&tcon->open_file_lock);
378                 return;
379         }
380         spin_unlock(&cifs_file->file_info_lock);
381
382         if (server->ops->get_lease_key)
383                 server->ops->get_lease_key(inode, &fid);
384
385         /* store open in pending opens to make sure we don't miss lease break */
386         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
387
388         /* remove it from the lists */
389         list_del(&cifs_file->flist);
390         list_del(&cifs_file->tlist);
391
392         if (list_empty(&cifsi->openFileList)) {
393                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
394                          d_inode(cifs_file->dentry));
395                 /*
396                  * In strict cache mode we need invalidate mapping on the last
397                  * close  because it may cause a error when we open this file
398                  * again and get at least level II oplock.
399                  */
400                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
401                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
402                 cifs_set_oplock_level(cifsi, 0);
403         }
404
405         spin_unlock(&tcon->open_file_lock);
406
407         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
408
409         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
410                 struct TCP_Server_Info *server = tcon->ses->server;
411                 unsigned int xid;
412
413                 xid = get_xid();
414                 if (server->ops->close)
415                         server->ops->close(xid, tcon, &cifs_file->fid);
416                 _free_xid(xid);
417         }
418
419         if (oplock_break_cancelled)
420                 cifs_done_oplock_break(cifsi);
421
422         cifs_del_pending_open(&open);
423
424         /*
425          * Delete any outstanding lock records. We'll lose them when the file
426          * is closed anyway.
427          */
428         down_write(&cifsi->lock_sem);
429         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
430                 list_del(&li->llist);
431                 cifs_del_lock_waiters(li);
432                 kfree(li);
433         }
434         list_del(&cifs_file->llist->llist);
435         kfree(cifs_file->llist);
436         up_write(&cifsi->lock_sem);
437
438         cifs_put_tlink(cifs_file->tlink);
439         dput(cifs_file->dentry);
440         cifs_sb_deactive(sb);
441         kfree(cifs_file);
442 }
443
444 int cifs_open(struct inode *inode, struct file *file)
445
446 {
447         int rc = -EACCES;
448         unsigned int xid;
449         __u32 oplock;
450         struct cifs_sb_info *cifs_sb;
451         struct TCP_Server_Info *server;
452         struct cifs_tcon *tcon;
453         struct tcon_link *tlink;
454         struct cifsFileInfo *cfile = NULL;
455         char *full_path = NULL;
456         bool posix_open_ok = false;
457         struct cifs_fid fid;
458         struct cifs_pending_open open;
459
460         xid = get_xid();
461
462         cifs_sb = CIFS_SB(inode->i_sb);
463         tlink = cifs_sb_tlink(cifs_sb);
464         if (IS_ERR(tlink)) {
465                 free_xid(xid);
466                 return PTR_ERR(tlink);
467         }
468         tcon = tlink_tcon(tlink);
469         server = tcon->ses->server;
470
471         full_path = build_path_from_dentry(file_dentry(file));
472         if (full_path == NULL) {
473                 rc = -ENOMEM;
474                 goto out;
475         }
476
477         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
478                  inode, file->f_flags, full_path);
479
480         if (file->f_flags & O_DIRECT &&
481             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
482                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
483                         file->f_op = &cifs_file_direct_nobrl_ops;
484                 else
485                         file->f_op = &cifs_file_direct_ops;
486         }
487
488         if (server->oplocks)
489                 oplock = REQ_OPLOCK;
490         else
491                 oplock = 0;
492
493         if (!tcon->broken_posix_open && tcon->unix_ext &&
494             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
495                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
496                 /* can not refresh inode info since size could be stale */
497                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
498                                 cifs_sb->mnt_file_mode /* ignored */,
499                                 file->f_flags, &oplock, &fid.netfid, xid);
500                 if (rc == 0) {
501                         cifs_dbg(FYI, "posix open succeeded\n");
502                         posix_open_ok = true;
503                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
504                         if (tcon->ses->serverNOS)
505                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
506                                          tcon->ses->serverName,
507                                          tcon->ses->serverNOS);
508                         tcon->broken_posix_open = true;
509                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
510                          (rc != -EOPNOTSUPP)) /* path not found or net err */
511                         goto out;
512                 /*
513                  * Else fallthrough to retry open the old way on network i/o
514                  * or DFS errors.
515                  */
516         }
517
518         if (server->ops->get_lease_key)
519                 server->ops->get_lease_key(inode, &fid);
520
521         cifs_add_pending_open(&fid, tlink, &open);
522
523         if (!posix_open_ok) {
524                 if (server->ops->get_lease_key)
525                         server->ops->get_lease_key(inode, &fid);
526
527                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
528                                   file->f_flags, &oplock, &fid, xid);
529                 if (rc) {
530                         cifs_del_pending_open(&open);
531                         goto out;
532                 }
533         }
534
535         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
536         if (cfile == NULL) {
537                 if (server->ops->close)
538                         server->ops->close(xid, tcon, &fid);
539                 cifs_del_pending_open(&open);
540                 rc = -ENOMEM;
541                 goto out;
542         }
543
544         cifs_fscache_set_inode_cookie(inode, file);
545
546         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
547                 /*
548                  * Time to set mode which we can not set earlier due to
549                  * problems creating new read-only files.
550                  */
551                 struct cifs_unix_set_info_args args = {
552                         .mode   = inode->i_mode,
553                         .uid    = INVALID_UID, /* no change */
554                         .gid    = INVALID_GID, /* no change */
555                         .ctime  = NO_CHANGE_64,
556                         .atime  = NO_CHANGE_64,
557                         .mtime  = NO_CHANGE_64,
558                         .device = 0,
559                 };
560                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
561                                        cfile->pid);
562         }
563
564 out:
565         kfree(full_path);
566         free_xid(xid);
567         cifs_put_tlink(tlink);
568         return rc;
569 }
570
571 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
572
573 /*
574  * Try to reacquire byte range locks that were released when session
575  * to server was lost.
576  */
577 static int
578 cifs_relock_file(struct cifsFileInfo *cfile)
579 {
580         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
581         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
582         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
583         int rc = 0;
584
585         down_read(&cinode->lock_sem);
586         if (cinode->can_cache_brlcks) {
587                 /* can cache locks - no need to relock */
588                 up_read(&cinode->lock_sem);
589                 return rc;
590         }
591
592         if (cap_unix(tcon->ses) &&
593             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
594             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
595                 rc = cifs_push_posix_locks(cfile);
596         else
597                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
598
599         up_read(&cinode->lock_sem);
600         return rc;
601 }
602
603 static int
604 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
605 {
606         int rc = -EACCES;
607         unsigned int xid;
608         __u32 oplock;
609         struct cifs_sb_info *cifs_sb;
610         struct cifs_tcon *tcon;
611         struct TCP_Server_Info *server;
612         struct cifsInodeInfo *cinode;
613         struct inode *inode;
614         char *full_path = NULL;
615         int desired_access;
616         int disposition = FILE_OPEN;
617         int create_options = CREATE_NOT_DIR;
618         struct cifs_open_parms oparms;
619
620         xid = get_xid();
621         mutex_lock(&cfile->fh_mutex);
622         if (!cfile->invalidHandle) {
623                 mutex_unlock(&cfile->fh_mutex);
624                 rc = 0;
625                 free_xid(xid);
626                 return rc;
627         }
628
629         inode = d_inode(cfile->dentry);
630         cifs_sb = CIFS_SB(inode->i_sb);
631         tcon = tlink_tcon(cfile->tlink);
632         server = tcon->ses->server;
633
634         /*
635          * Can not grab rename sem here because various ops, including those
636          * that already have the rename sem can end up causing writepage to get
637          * called and if the server was down that means we end up here, and we
638          * can never tell if the caller already has the rename_sem.
639          */
640         full_path = build_path_from_dentry(cfile->dentry);
641         if (full_path == NULL) {
642                 rc = -ENOMEM;
643                 mutex_unlock(&cfile->fh_mutex);
644                 free_xid(xid);
645                 return rc;
646         }
647
648         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
649                  inode, cfile->f_flags, full_path);
650
651         if (tcon->ses->server->oplocks)
652                 oplock = REQ_OPLOCK;
653         else
654                 oplock = 0;
655
656         if (tcon->unix_ext && cap_unix(tcon->ses) &&
657             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
658                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
659                 /*
660                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
661                  * original open. Must mask them off for a reopen.
662                  */
663                 unsigned int oflags = cfile->f_flags &
664                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
665
666                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
667                                      cifs_sb->mnt_file_mode /* ignored */,
668                                      oflags, &oplock, &cfile->fid.netfid, xid);
669                 if (rc == 0) {
670                         cifs_dbg(FYI, "posix reopen succeeded\n");
671                         oparms.reconnect = true;
672                         goto reopen_success;
673                 }
674                 /*
675                  * fallthrough to retry open the old way on errors, especially
676                  * in the reconnect path it is important to retry hard
677                  */
678         }
679
680         desired_access = cifs_convert_flags(cfile->f_flags);
681
682         if (backup_cred(cifs_sb))
683                 create_options |= CREATE_OPEN_BACKUP_INTENT;
684
685         if (server->ops->get_lease_key)
686                 server->ops->get_lease_key(inode, &cfile->fid);
687
688         oparms.tcon = tcon;
689         oparms.cifs_sb = cifs_sb;
690         oparms.desired_access = desired_access;
691         oparms.create_options = create_options;
692         oparms.disposition = disposition;
693         oparms.path = full_path;
694         oparms.fid = &cfile->fid;
695         oparms.reconnect = true;
696
697         /*
698          * Can not refresh inode by passing in file_info buf to be returned by
699          * ops->open and then calling get_inode_info with returned buf since
700          * file might have write behind data that needs to be flushed and server
701          * version of file size can be stale. If we knew for sure that inode was
702          * not dirty locally we could do this.
703          */
704         rc = server->ops->open(xid, &oparms, &oplock, NULL);
705         if (rc == -ENOENT && oparms.reconnect == false) {
706                 /* durable handle timeout is expired - open the file again */
707                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
708                 /* indicate that we need to relock the file */
709                 oparms.reconnect = true;
710         }
711
712         if (rc) {
713                 mutex_unlock(&cfile->fh_mutex);
714                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
715                 cifs_dbg(FYI, "oplock: %d\n", oplock);
716                 goto reopen_error_exit;
717         }
718
719 reopen_success:
720         cfile->invalidHandle = false;
721         mutex_unlock(&cfile->fh_mutex);
722         cinode = CIFS_I(inode);
723
724         if (can_flush) {
725                 rc = filemap_write_and_wait(inode->i_mapping);
726                 mapping_set_error(inode->i_mapping, rc);
727
728                 if (tcon->unix_ext)
729                         rc = cifs_get_inode_info_unix(&inode, full_path,
730                                                       inode->i_sb, xid);
731                 else
732                         rc = cifs_get_inode_info(&inode, full_path, NULL,
733                                                  inode->i_sb, xid, NULL);
734         }
735         /*
736          * Else we are writing out data to server already and could deadlock if
737          * we tried to flush data, and since we do not know if we have data that
738          * would invalidate the current end of file on the server we can not go
739          * to the server to get the new inode info.
740          */
741
742         server->ops->set_fid(cfile, &cfile->fid, oplock);
743         if (oparms.reconnect)
744                 cifs_relock_file(cfile);
745
746 reopen_error_exit:
747         kfree(full_path);
748         free_xid(xid);
749         return rc;
750 }
751
752 int cifs_close(struct inode *inode, struct file *file)
753 {
754         if (file->private_data != NULL) {
755                 cifsFileInfo_put(file->private_data);
756                 file->private_data = NULL;
757         }
758
759         /* return code from the ->release op is always ignored */
760         return 0;
761 }
762
763 void
764 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
765 {
766         struct cifsFileInfo *open_file;
767         struct list_head *tmp;
768         struct list_head *tmp1;
769         struct list_head tmp_list;
770
771         cifs_dbg(FYI, "Reopen persistent handles");
772         INIT_LIST_HEAD(&tmp_list);
773
774         /* list all files open on tree connection, reopen resilient handles  */
775         spin_lock(&tcon->open_file_lock);
776         list_for_each(tmp, &tcon->openFileList) {
777                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
778                 if (!open_file->invalidHandle)
779                         continue;
780                 cifsFileInfo_get(open_file);
781                 list_add_tail(&open_file->rlist, &tmp_list);
782         }
783         spin_unlock(&tcon->open_file_lock);
784
785         list_for_each_safe(tmp, tmp1, &tmp_list) {
786                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
787                 cifs_reopen_file(open_file, false /* do not flush */);
788                 list_del_init(&open_file->rlist);
789                 cifsFileInfo_put(open_file);
790         }
791 }
792
793 int cifs_closedir(struct inode *inode, struct file *file)
794 {
795         int rc = 0;
796         unsigned int xid;
797         struct cifsFileInfo *cfile = file->private_data;
798         struct cifs_tcon *tcon;
799         struct TCP_Server_Info *server;
800         char *buf;
801
802         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
803
804         if (cfile == NULL)
805                 return rc;
806
807         xid = get_xid();
808         tcon = tlink_tcon(cfile->tlink);
809         server = tcon->ses->server;
810
811         cifs_dbg(FYI, "Freeing private data in close dir\n");
812         spin_lock(&cfile->file_info_lock);
813         if (server->ops->dir_needs_close(cfile)) {
814                 cfile->invalidHandle = true;
815                 spin_unlock(&cfile->file_info_lock);
816                 if (server->ops->close_dir)
817                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
818                 else
819                         rc = -ENOSYS;
820                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
821                 /* not much we can do if it fails anyway, ignore rc */
822                 rc = 0;
823         } else
824                 spin_unlock(&cfile->file_info_lock);
825
826         buf = cfile->srch_inf.ntwrk_buf_start;
827         if (buf) {
828                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
829                 cfile->srch_inf.ntwrk_buf_start = NULL;
830                 if (cfile->srch_inf.smallBuf)
831                         cifs_small_buf_release(buf);
832                 else
833                         cifs_buf_release(buf);
834         }
835
836         cifs_put_tlink(cfile->tlink);
837         kfree(file->private_data);
838         file->private_data = NULL;
839         /* BB can we lock the filestruct while this is going on? */
840         free_xid(xid);
841         return rc;
842 }
843
844 static struct cifsLockInfo *
845 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
846 {
847         struct cifsLockInfo *lock =
848                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
849         if (!lock)
850                 return lock;
851         lock->offset = offset;
852         lock->length = length;
853         lock->type = type;
854         lock->pid = current->tgid;
855         INIT_LIST_HEAD(&lock->blist);
856         init_waitqueue_head(&lock->block_q);
857         return lock;
858 }
859
860 void
861 cifs_del_lock_waiters(struct cifsLockInfo *lock)
862 {
863         struct cifsLockInfo *li, *tmp;
864         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
865                 list_del_init(&li->blist);
866                 wake_up(&li->block_q);
867         }
868 }
869
870 #define CIFS_LOCK_OP    0
871 #define CIFS_READ_OP    1
872 #define CIFS_WRITE_OP   2
873
874 /* @rw_check : 0 - no op, 1 - read, 2 - write */
875 static bool
876 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
877                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
878                             struct cifsLockInfo **conf_lock, int rw_check)
879 {
880         struct cifsLockInfo *li;
881         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
882         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
883
884         list_for_each_entry(li, &fdlocks->locks, llist) {
885                 if (offset + length <= li->offset ||
886                     offset >= li->offset + li->length)
887                         continue;
888                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
889                     server->ops->compare_fids(cfile, cur_cfile)) {
890                         /* shared lock prevents write op through the same fid */
891                         if (!(li->type & server->vals->shared_lock_type) ||
892                             rw_check != CIFS_WRITE_OP)
893                                 continue;
894                 }
895                 if ((type & server->vals->shared_lock_type) &&
896                     ((server->ops->compare_fids(cfile, cur_cfile) &&
897                      current->tgid == li->pid) || type == li->type))
898                         continue;
899                 if (conf_lock)
900                         *conf_lock = li;
901                 return true;
902         }
903         return false;
904 }
905
906 bool
907 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
908                         __u8 type, struct cifsLockInfo **conf_lock,
909                         int rw_check)
910 {
911         bool rc = false;
912         struct cifs_fid_locks *cur;
913         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
914
915         list_for_each_entry(cur, &cinode->llist, llist) {
916                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
917                                                  cfile, conf_lock, rw_check);
918                 if (rc)
919                         break;
920         }
921
922         return rc;
923 }
924
925 /*
926  * Check if there is another lock that prevents us to set the lock (mandatory
927  * style). If such a lock exists, update the flock structure with its
928  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
929  * or leave it the same if we can't. Returns 0 if we don't need to request to
930  * the server or 1 otherwise.
931  */
932 static int
933 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
934                __u8 type, struct file_lock *flock)
935 {
936         int rc = 0;
937         struct cifsLockInfo *conf_lock;
938         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
939         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
940         bool exist;
941
942         down_read(&cinode->lock_sem);
943
944         exist = cifs_find_lock_conflict(cfile, offset, length, type,
945                                         &conf_lock, CIFS_LOCK_OP);
946         if (exist) {
947                 flock->fl_start = conf_lock->offset;
948                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
949                 flock->fl_pid = conf_lock->pid;
950                 if (conf_lock->type & server->vals->shared_lock_type)
951                         flock->fl_type = F_RDLCK;
952                 else
953                         flock->fl_type = F_WRLCK;
954         } else if (!cinode->can_cache_brlcks)
955                 rc = 1;
956         else
957                 flock->fl_type = F_UNLCK;
958
959         up_read(&cinode->lock_sem);
960         return rc;
961 }
962
963 static void
964 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
965 {
966         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
967         down_write(&cinode->lock_sem);
968         list_add_tail(&lock->llist, &cfile->llist->locks);
969         up_write(&cinode->lock_sem);
970 }
971
972 /*
973  * Set the byte-range lock (mandatory style). Returns:
974  * 1) 0, if we set the lock and don't need to request to the server;
975  * 2) 1, if no locks prevent us but we need to request to the server;
976  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
977  */
978 static int
979 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
980                  bool wait)
981 {
982         struct cifsLockInfo *conf_lock;
983         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
984         bool exist;
985         int rc = 0;
986
987 try_again:
988         exist = false;
989         down_write(&cinode->lock_sem);
990
991         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
992                                         lock->type, &conf_lock, CIFS_LOCK_OP);
993         if (!exist && cinode->can_cache_brlcks) {
994                 list_add_tail(&lock->llist, &cfile->llist->locks);
995                 up_write(&cinode->lock_sem);
996                 return rc;
997         }
998
999         if (!exist)
1000                 rc = 1;
1001         else if (!wait)
1002                 rc = -EACCES;
1003         else {
1004                 list_add_tail(&lock->blist, &conf_lock->blist);
1005                 up_write(&cinode->lock_sem);
1006                 rc = wait_event_interruptible(lock->block_q,
1007                                         (lock->blist.prev == &lock->blist) &&
1008                                         (lock->blist.next == &lock->blist));
1009                 if (!rc)
1010                         goto try_again;
1011                 down_write(&cinode->lock_sem);
1012                 list_del_init(&lock->blist);
1013         }
1014
1015         up_write(&cinode->lock_sem);
1016         return rc;
1017 }
1018
1019 /*
1020  * Check if there is another lock that prevents us to set the lock (posix
1021  * style). If such a lock exists, update the flock structure with its
1022  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1023  * or leave it the same if we can't. Returns 0 if we don't need to request to
1024  * the server or 1 otherwise.
1025  */
1026 static int
1027 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1028 {
1029         int rc = 0;
1030         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1031         unsigned char saved_type = flock->fl_type;
1032
1033         if ((flock->fl_flags & FL_POSIX) == 0)
1034                 return 1;
1035
1036         down_read(&cinode->lock_sem);
1037         posix_test_lock(file, flock);
1038
1039         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1040                 flock->fl_type = saved_type;
1041                 rc = 1;
1042         }
1043
1044         up_read(&cinode->lock_sem);
1045         return rc;
1046 }
1047
1048 /*
1049  * Set the byte-range lock (posix style). Returns:
1050  * 1) 0, if we set the lock and don't need to request to the server;
1051  * 2) 1, if we need to request to the server;
1052  * 3) <0, if the error occurs while setting the lock.
1053  */
1054 static int
1055 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1056 {
1057         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1058         int rc = 1;
1059
1060         if ((flock->fl_flags & FL_POSIX) == 0)
1061                 return rc;
1062
1063 try_again:
1064         down_write(&cinode->lock_sem);
1065         if (!cinode->can_cache_brlcks) {
1066                 up_write(&cinode->lock_sem);
1067                 return rc;
1068         }
1069
1070         rc = posix_lock_file(file, flock, NULL);
1071         up_write(&cinode->lock_sem);
1072         if (rc == FILE_LOCK_DEFERRED) {
1073                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1074                 if (!rc)
1075                         goto try_again;
1076                 posix_unblock_lock(flock);
1077         }
1078         return rc;
1079 }
1080
1081 int
1082 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1083 {
1084         unsigned int xid;
1085         int rc = 0, stored_rc;
1086         struct cifsLockInfo *li, *tmp;
1087         struct cifs_tcon *tcon;
1088         unsigned int num, max_num, max_buf;
1089         LOCKING_ANDX_RANGE *buf, *cur;
1090         int types[] = {LOCKING_ANDX_LARGE_FILES,
1091                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1092         int i;
1093
1094         xid = get_xid();
1095         tcon = tlink_tcon(cfile->tlink);
1096
1097         /*
1098          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1099          * and check it for zero before using.
1100          */
1101         max_buf = tcon->ses->server->maxBuf;
1102         if (!max_buf) {
1103                 free_xid(xid);
1104                 return -EINVAL;
1105         }
1106
1107         max_num = (max_buf - sizeof(struct smb_hdr)) /
1108                                                 sizeof(LOCKING_ANDX_RANGE);
1109         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1110         if (!buf) {
1111                 free_xid(xid);
1112                 return -ENOMEM;
1113         }
1114
1115         for (i = 0; i < 2; i++) {
1116                 cur = buf;
1117                 num = 0;
1118                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1119                         if (li->type != types[i])
1120                                 continue;
1121                         cur->Pid = cpu_to_le16(li->pid);
1122                         cur->LengthLow = cpu_to_le32((u32)li->length);
1123                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1124                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1125                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1126                         if (++num == max_num) {
1127                                 stored_rc = cifs_lockv(xid, tcon,
1128                                                        cfile->fid.netfid,
1129                                                        (__u8)li->type, 0, num,
1130                                                        buf);
1131                                 if (stored_rc)
1132                                         rc = stored_rc;
1133                                 cur = buf;
1134                                 num = 0;
1135                         } else
1136                                 cur++;
1137                 }
1138
1139                 if (num) {
1140                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1141                                                (__u8)types[i], 0, num, buf);
1142                         if (stored_rc)
1143                                 rc = stored_rc;
1144                 }
1145         }
1146
1147         kfree(buf);
1148         free_xid(xid);
1149         return rc;
1150 }
1151
1152 static __u32
1153 hash_lockowner(fl_owner_t owner)
1154 {
1155         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1156 }
1157
1158 struct lock_to_push {
1159         struct list_head llist;
1160         __u64 offset;
1161         __u64 length;
1162         __u32 pid;
1163         __u16 netfid;
1164         __u8 type;
1165 };
1166
1167 static int
1168 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1169 {
1170         struct inode *inode = d_inode(cfile->dentry);
1171         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1172         struct file_lock *flock;
1173         struct file_lock_context *flctx = inode->i_flctx;
1174         unsigned int count = 0, i;
1175         int rc = 0, xid, type;
1176         struct list_head locks_to_send, *el;
1177         struct lock_to_push *lck, *tmp;
1178         __u64 length;
1179
1180         xid = get_xid();
1181
1182         if (!flctx)
1183                 goto out;
1184
1185         spin_lock(&flctx->flc_lock);
1186         list_for_each(el, &flctx->flc_posix) {
1187                 count++;
1188         }
1189         spin_unlock(&flctx->flc_lock);
1190
1191         INIT_LIST_HEAD(&locks_to_send);
1192
1193         /*
1194          * Allocating count locks is enough because no FL_POSIX locks can be
1195          * added to the list while we are holding cinode->lock_sem that
1196          * protects locking operations of this inode.
1197          */
1198         for (i = 0; i < count; i++) {
1199                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1200                 if (!lck) {
1201                         rc = -ENOMEM;
1202                         goto err_out;
1203                 }
1204                 list_add_tail(&lck->llist, &locks_to_send);
1205         }
1206
1207         el = locks_to_send.next;
1208         spin_lock(&flctx->flc_lock);
1209         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1210                 if (el == &locks_to_send) {
1211                         /*
1212                          * The list ended. We don't have enough allocated
1213                          * structures - something is really wrong.
1214                          */
1215                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1216                         break;
1217                 }
1218                 length = 1 + flock->fl_end - flock->fl_start;
1219                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1220                         type = CIFS_RDLCK;
1221                 else
1222                         type = CIFS_WRLCK;
1223                 lck = list_entry(el, struct lock_to_push, llist);
1224                 lck->pid = hash_lockowner(flock->fl_owner);
1225                 lck->netfid = cfile->fid.netfid;
1226                 lck->length = length;
1227                 lck->type = type;
1228                 lck->offset = flock->fl_start;
1229         }
1230         spin_unlock(&flctx->flc_lock);
1231
1232         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1233                 int stored_rc;
1234
1235                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1236                                              lck->offset, lck->length, NULL,
1237                                              lck->type, 0);
1238                 if (stored_rc)
1239                         rc = stored_rc;
1240                 list_del(&lck->llist);
1241                 kfree(lck);
1242         }
1243
1244 out:
1245         free_xid(xid);
1246         return rc;
1247 err_out:
1248         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1249                 list_del(&lck->llist);
1250                 kfree(lck);
1251         }
1252         goto out;
1253 }
1254
1255 static int
1256 cifs_push_locks(struct cifsFileInfo *cfile)
1257 {
1258         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1259         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1260         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1261         int rc = 0;
1262
1263         /* we are going to update can_cache_brlcks here - need a write access */
1264         down_write(&cinode->lock_sem);
1265         if (!cinode->can_cache_brlcks) {
1266                 up_write(&cinode->lock_sem);
1267                 return rc;
1268         }
1269
1270         if (cap_unix(tcon->ses) &&
1271             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1272             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1273                 rc = cifs_push_posix_locks(cfile);
1274         else
1275                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1276
1277         cinode->can_cache_brlcks = false;
1278         up_write(&cinode->lock_sem);
1279         return rc;
1280 }
1281
1282 static void
1283 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1284                 bool *wait_flag, struct TCP_Server_Info *server)
1285 {
1286         if (flock->fl_flags & FL_POSIX)
1287                 cifs_dbg(FYI, "Posix\n");
1288         if (flock->fl_flags & FL_FLOCK)
1289                 cifs_dbg(FYI, "Flock\n");
1290         if (flock->fl_flags & FL_SLEEP) {
1291                 cifs_dbg(FYI, "Blocking lock\n");
1292                 *wait_flag = true;
1293         }
1294         if (flock->fl_flags & FL_ACCESS)
1295                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1296         if (flock->fl_flags & FL_LEASE)
1297                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1298         if (flock->fl_flags &
1299             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1300                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1301                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1302
1303         *type = server->vals->large_lock_type;
1304         if (flock->fl_type == F_WRLCK) {
1305                 cifs_dbg(FYI, "F_WRLCK\n");
1306                 *type |= server->vals->exclusive_lock_type;
1307                 *lock = 1;
1308         } else if (flock->fl_type == F_UNLCK) {
1309                 cifs_dbg(FYI, "F_UNLCK\n");
1310                 *type |= server->vals->unlock_lock_type;
1311                 *unlock = 1;
1312                 /* Check if unlock includes more than one lock range */
1313         } else if (flock->fl_type == F_RDLCK) {
1314                 cifs_dbg(FYI, "F_RDLCK\n");
1315                 *type |= server->vals->shared_lock_type;
1316                 *lock = 1;
1317         } else if (flock->fl_type == F_EXLCK) {
1318                 cifs_dbg(FYI, "F_EXLCK\n");
1319                 *type |= server->vals->exclusive_lock_type;
1320                 *lock = 1;
1321         } else if (flock->fl_type == F_SHLCK) {
1322                 cifs_dbg(FYI, "F_SHLCK\n");
1323                 *type |= server->vals->shared_lock_type;
1324                 *lock = 1;
1325         } else
1326                 cifs_dbg(FYI, "Unknown type of lock\n");
1327 }
1328
1329 static int
1330 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1331            bool wait_flag, bool posix_lck, unsigned int xid)
1332 {
1333         int rc = 0;
1334         __u64 length = 1 + flock->fl_end - flock->fl_start;
1335         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1336         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1337         struct TCP_Server_Info *server = tcon->ses->server;
1338         __u16 netfid = cfile->fid.netfid;
1339
1340         if (posix_lck) {
1341                 int posix_lock_type;
1342
1343                 rc = cifs_posix_lock_test(file, flock);
1344                 if (!rc)
1345                         return rc;
1346
1347                 if (type & server->vals->shared_lock_type)
1348                         posix_lock_type = CIFS_RDLCK;
1349                 else
1350                         posix_lock_type = CIFS_WRLCK;
1351                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1352                                       hash_lockowner(flock->fl_owner),
1353                                       flock->fl_start, length, flock,
1354                                       posix_lock_type, wait_flag);
1355                 return rc;
1356         }
1357
1358         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1359         if (!rc)
1360                 return rc;
1361
1362         /* BB we could chain these into one lock request BB */
1363         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1364                                     1, 0, false);
1365         if (rc == 0) {
1366                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1367                                             type, 0, 1, false);
1368                 flock->fl_type = F_UNLCK;
1369                 if (rc != 0)
1370                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1371                                  rc);
1372                 return 0;
1373         }
1374
1375         if (type & server->vals->shared_lock_type) {
1376                 flock->fl_type = F_WRLCK;
1377                 return 0;
1378         }
1379
1380         type &= ~server->vals->exclusive_lock_type;
1381
1382         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1383                                     type | server->vals->shared_lock_type,
1384                                     1, 0, false);
1385         if (rc == 0) {
1386                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1387                         type | server->vals->shared_lock_type, 0, 1, false);
1388                 flock->fl_type = F_RDLCK;
1389                 if (rc != 0)
1390                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1391                                  rc);
1392         } else
1393                 flock->fl_type = F_WRLCK;
1394
1395         return 0;
1396 }
1397
1398 void
1399 cifs_move_llist(struct list_head *source, struct list_head *dest)
1400 {
1401         struct list_head *li, *tmp;
1402         list_for_each_safe(li, tmp, source)
1403                 list_move(li, dest);
1404 }
1405
1406 void
1407 cifs_free_llist(struct list_head *llist)
1408 {
1409         struct cifsLockInfo *li, *tmp;
1410         list_for_each_entry_safe(li, tmp, llist, llist) {
1411                 cifs_del_lock_waiters(li);
1412                 list_del(&li->llist);
1413                 kfree(li);
1414         }
1415 }
1416
1417 int
1418 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1419                   unsigned int xid)
1420 {
1421         int rc = 0, stored_rc;
1422         int types[] = {LOCKING_ANDX_LARGE_FILES,
1423                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1424         unsigned int i;
1425         unsigned int max_num, num, max_buf;
1426         LOCKING_ANDX_RANGE *buf, *cur;
1427         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1428         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1429         struct cifsLockInfo *li, *tmp;
1430         __u64 length = 1 + flock->fl_end - flock->fl_start;
1431         struct list_head tmp_llist;
1432
1433         INIT_LIST_HEAD(&tmp_llist);
1434
1435         /*
1436          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1437          * and check it for zero before using.
1438          */
1439         max_buf = tcon->ses->server->maxBuf;
1440         if (!max_buf)
1441                 return -EINVAL;
1442
1443         max_num = (max_buf - sizeof(struct smb_hdr)) /
1444                                                 sizeof(LOCKING_ANDX_RANGE);
1445         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1446         if (!buf)
1447                 return -ENOMEM;
1448
1449         down_write(&cinode->lock_sem);
1450         for (i = 0; i < 2; i++) {
1451                 cur = buf;
1452                 num = 0;
1453                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1454                         if (flock->fl_start > li->offset ||
1455                             (flock->fl_start + length) <
1456                             (li->offset + li->length))
1457                                 continue;
1458                         if (current->tgid != li->pid)
1459                                 continue;
1460                         if (types[i] != li->type)
1461                                 continue;
1462                         if (cinode->can_cache_brlcks) {
1463                                 /*
1464                                  * We can cache brlock requests - simply remove
1465                                  * a lock from the file's list.
1466                                  */
1467                                 list_del(&li->llist);
1468                                 cifs_del_lock_waiters(li);
1469                                 kfree(li);
1470                                 continue;
1471                         }
1472                         cur->Pid = cpu_to_le16(li->pid);
1473                         cur->LengthLow = cpu_to_le32((u32)li->length);
1474                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1475                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1476                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1477                         /*
1478                          * We need to save a lock here to let us add it again to
1479                          * the file's list if the unlock range request fails on
1480                          * the server.
1481                          */
1482                         list_move(&li->llist, &tmp_llist);
1483                         if (++num == max_num) {
1484                                 stored_rc = cifs_lockv(xid, tcon,
1485                                                        cfile->fid.netfid,
1486                                                        li->type, num, 0, buf);
1487                                 if (stored_rc) {
1488                                         /*
1489                                          * We failed on the unlock range
1490                                          * request - add all locks from the tmp
1491                                          * list to the head of the file's list.
1492                                          */
1493                                         cifs_move_llist(&tmp_llist,
1494                                                         &cfile->llist->locks);
1495                                         rc = stored_rc;
1496                                 } else
1497                                         /*
1498                                          * The unlock range request succeed -
1499                                          * free the tmp list.
1500                                          */
1501                                         cifs_free_llist(&tmp_llist);
1502                                 cur = buf;
1503                                 num = 0;
1504                         } else
1505                                 cur++;
1506                 }
1507                 if (num) {
1508                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1509                                                types[i], num, 0, buf);
1510                         if (stored_rc) {
1511                                 cifs_move_llist(&tmp_llist,
1512                                                 &cfile->llist->locks);
1513                                 rc = stored_rc;
1514                         } else
1515                                 cifs_free_llist(&tmp_llist);
1516                 }
1517         }
1518
1519         up_write(&cinode->lock_sem);
1520         kfree(buf);
1521         return rc;
1522 }
1523
1524 static int
1525 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1526            bool wait_flag, bool posix_lck, int lock, int unlock,
1527            unsigned int xid)
1528 {
1529         int rc = 0;
1530         __u64 length = 1 + flock->fl_end - flock->fl_start;
1531         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1532         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1533         struct TCP_Server_Info *server = tcon->ses->server;
1534         struct inode *inode = d_inode(cfile->dentry);
1535
1536         if (posix_lck) {
1537                 int posix_lock_type;
1538
1539                 rc = cifs_posix_lock_set(file, flock);
1540                 if (!rc || rc < 0)
1541                         return rc;
1542
1543                 if (type & server->vals->shared_lock_type)
1544                         posix_lock_type = CIFS_RDLCK;
1545                 else
1546                         posix_lock_type = CIFS_WRLCK;
1547
1548                 if (unlock == 1)
1549                         posix_lock_type = CIFS_UNLCK;
1550
1551                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1552                                       hash_lockowner(flock->fl_owner),
1553                                       flock->fl_start, length,
1554                                       NULL, posix_lock_type, wait_flag);
1555                 goto out;
1556         }
1557
1558         if (lock) {
1559                 struct cifsLockInfo *lock;
1560
1561                 lock = cifs_lock_init(flock->fl_start, length, type);
1562                 if (!lock)
1563                         return -ENOMEM;
1564
1565                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1566                 if (rc < 0) {
1567                         kfree(lock);
1568                         return rc;
1569                 }
1570                 if (!rc)
1571                         goto out;
1572
1573                 /*
1574                  * Windows 7 server can delay breaking lease from read to None
1575                  * if we set a byte-range lock on a file - break it explicitly
1576                  * before sending the lock to the server to be sure the next
1577                  * read won't conflict with non-overlapted locks due to
1578                  * pagereading.
1579                  */
1580                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1581                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1582                         cifs_zap_mapping(inode);
1583                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1584                                  inode);
1585                         CIFS_I(inode)->oplock = 0;
1586                 }
1587
1588                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1589                                             type, 1, 0, wait_flag);
1590                 if (rc) {
1591                         kfree(lock);
1592                         return rc;
1593                 }
1594
1595                 cifs_lock_add(cfile, lock);
1596         } else if (unlock)
1597                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1598
1599 out:
1600         if (flock->fl_flags & FL_POSIX && !rc)
1601                 rc = locks_lock_file_wait(file, flock);
1602         return rc;
1603 }
1604
1605 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1606 {
1607         int rc, xid;
1608         int lock = 0, unlock = 0;
1609         bool wait_flag = false;
1610         bool posix_lck = false;
1611         struct cifs_sb_info *cifs_sb;
1612         struct cifs_tcon *tcon;
1613         struct cifsInodeInfo *cinode;
1614         struct cifsFileInfo *cfile;
1615         __u16 netfid;
1616         __u32 type;
1617
1618         rc = -EACCES;
1619         xid = get_xid();
1620
1621         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1622                  cmd, flock->fl_flags, flock->fl_type,
1623                  flock->fl_start, flock->fl_end);
1624
1625         cfile = (struct cifsFileInfo *)file->private_data;
1626         tcon = tlink_tcon(cfile->tlink);
1627
1628         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1629                         tcon->ses->server);
1630
1631         cifs_sb = CIFS_FILE_SB(file);
1632         netfid = cfile->fid.netfid;
1633         cinode = CIFS_I(file_inode(file));
1634
1635         if (cap_unix(tcon->ses) &&
1636             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1637             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1638                 posix_lck = true;
1639         /*
1640          * BB add code here to normalize offset and length to account for
1641          * negative length which we can not accept over the wire.
1642          */
1643         if (IS_GETLK(cmd)) {
1644                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1645                 free_xid(xid);
1646                 return rc;
1647         }
1648
1649         if (!lock && !unlock) {
1650                 /*
1651                  * if no lock or unlock then nothing to do since we do not
1652                  * know what it is
1653                  */
1654                 free_xid(xid);
1655                 return -EOPNOTSUPP;
1656         }
1657
1658         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1659                         xid);
1660         free_xid(xid);
1661         return rc;
1662 }
1663
1664 /*
1665  * update the file size (if needed) after a write. Should be called with
1666  * the inode->i_lock held
1667  */
1668 void
1669 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1670                       unsigned int bytes_written)
1671 {
1672         loff_t end_of_write = offset + bytes_written;
1673
1674         if (end_of_write > cifsi->server_eof)
1675                 cifsi->server_eof = end_of_write;
1676 }
1677
1678 static ssize_t
1679 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1680            size_t write_size, loff_t *offset)
1681 {
1682         int rc = 0;
1683         unsigned int bytes_written = 0;
1684         unsigned int total_written;
1685         struct cifs_sb_info *cifs_sb;
1686         struct cifs_tcon *tcon;
1687         struct TCP_Server_Info *server;
1688         unsigned int xid;
1689         struct dentry *dentry = open_file->dentry;
1690         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1691         struct cifs_io_parms io_parms;
1692
1693         cifs_sb = CIFS_SB(dentry->d_sb);
1694
1695         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1696                  write_size, *offset, dentry);
1697
1698         tcon = tlink_tcon(open_file->tlink);
1699         server = tcon->ses->server;
1700
1701         if (!server->ops->sync_write)
1702                 return -ENOSYS;
1703
1704         xid = get_xid();
1705
1706         for (total_written = 0; write_size > total_written;
1707              total_written += bytes_written) {
1708                 rc = -EAGAIN;
1709                 while (rc == -EAGAIN) {
1710                         struct kvec iov[2];
1711                         unsigned int len;
1712
1713                         if (open_file->invalidHandle) {
1714                                 /* we could deadlock if we called
1715                                    filemap_fdatawait from here so tell
1716                                    reopen_file not to flush data to
1717                                    server now */
1718                                 rc = cifs_reopen_file(open_file, false);
1719                                 if (rc != 0)
1720                                         break;
1721                         }
1722
1723                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1724                                   (unsigned int)write_size - total_written);
1725                         /* iov[0] is reserved for smb header */
1726                         iov[1].iov_base = (char *)write_data + total_written;
1727                         iov[1].iov_len = len;
1728                         io_parms.pid = pid;
1729                         io_parms.tcon = tcon;
1730                         io_parms.offset = *offset;
1731                         io_parms.length = len;
1732                         rc = server->ops->sync_write(xid, &open_file->fid,
1733                                         &io_parms, &bytes_written, iov, 1);
1734                 }
1735                 if (rc || (bytes_written == 0)) {
1736                         if (total_written)
1737                                 break;
1738                         else {
1739                                 free_xid(xid);
1740                                 return rc;
1741                         }
1742                 } else {
1743                         spin_lock(&d_inode(dentry)->i_lock);
1744                         cifs_update_eof(cifsi, *offset, bytes_written);
1745                         spin_unlock(&d_inode(dentry)->i_lock);
1746                         *offset += bytes_written;
1747                 }
1748         }
1749
1750         cifs_stats_bytes_written(tcon, total_written);
1751
1752         if (total_written > 0) {
1753                 spin_lock(&d_inode(dentry)->i_lock);
1754                 if (*offset > d_inode(dentry)->i_size)
1755                         i_size_write(d_inode(dentry), *offset);
1756                 spin_unlock(&d_inode(dentry)->i_lock);
1757         }
1758         mark_inode_dirty_sync(d_inode(dentry));
1759         free_xid(xid);
1760         return total_written;
1761 }
1762
1763 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1764                                         bool fsuid_only)
1765 {
1766         struct cifsFileInfo *open_file = NULL;
1767         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1768         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1769
1770         /* only filter by fsuid on multiuser mounts */
1771         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1772                 fsuid_only = false;
1773
1774         spin_lock(&tcon->open_file_lock);
1775         /* we could simply get the first_list_entry since write-only entries
1776            are always at the end of the list but since the first entry might
1777            have a close pending, we go through the whole list */
1778         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1779                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1780                         continue;
1781                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1782                         if (!open_file->invalidHandle) {
1783                                 /* found a good file */
1784                                 /* lock it so it will not be closed on us */
1785                                 cifsFileInfo_get(open_file);
1786                                 spin_unlock(&tcon->open_file_lock);
1787                                 return open_file;
1788                         } /* else might as well continue, and look for
1789                              another, or simply have the caller reopen it
1790                              again rather than trying to fix this handle */
1791                 } else /* write only file */
1792                         break; /* write only files are last so must be done */
1793         }
1794         spin_unlock(&tcon->open_file_lock);
1795         return NULL;
1796 }
1797
1798 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1799                                         bool fsuid_only)
1800 {
1801         struct cifsFileInfo *open_file, *inv_file = NULL;
1802         struct cifs_sb_info *cifs_sb;
1803         struct cifs_tcon *tcon;
1804         bool any_available = false;
1805         int rc;
1806         unsigned int refind = 0;
1807
1808         /* Having a null inode here (because mapping->host was set to zero by
1809         the VFS or MM) should not happen but we had reports of on oops (due to
1810         it being zero) during stress testcases so we need to check for it */
1811
1812         if (cifs_inode == NULL) {
1813                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1814                 dump_stack();
1815                 return NULL;
1816         }
1817
1818         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1819         tcon = cifs_sb_master_tcon(cifs_sb);
1820
1821         /* only filter by fsuid on multiuser mounts */
1822         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1823                 fsuid_only = false;
1824
1825         spin_lock(&tcon->open_file_lock);
1826 refind_writable:
1827         if (refind > MAX_REOPEN_ATT) {
1828                 spin_unlock(&tcon->open_file_lock);
1829                 return NULL;
1830         }
1831         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1832                 if (!any_available && open_file->pid != current->tgid)
1833                         continue;
1834                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1835                         continue;
1836                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1837                         if (!open_file->invalidHandle) {
1838                                 /* found a good writable file */
1839                                 cifsFileInfo_get(open_file);
1840                                 spin_unlock(&tcon->open_file_lock);
1841                                 return open_file;
1842                         } else {
1843                                 if (!inv_file)
1844                                         inv_file = open_file;
1845                         }
1846                 }
1847         }
1848         /* couldn't find useable FH with same pid, try any available */
1849         if (!any_available) {
1850                 any_available = true;
1851                 goto refind_writable;
1852         }
1853
1854         if (inv_file) {
1855                 any_available = false;
1856                 cifsFileInfo_get(inv_file);
1857         }
1858
1859         spin_unlock(&tcon->open_file_lock);
1860
1861         if (inv_file) {
1862                 rc = cifs_reopen_file(inv_file, false);
1863                 if (!rc)
1864                         return inv_file;
1865                 else {
1866                         spin_lock(&tcon->open_file_lock);
1867                         list_move_tail(&inv_file->flist,
1868                                         &cifs_inode->openFileList);
1869                         spin_unlock(&tcon->open_file_lock);
1870                         cifsFileInfo_put(inv_file);
1871                         ++refind;
1872                         inv_file = NULL;
1873                         spin_lock(&tcon->open_file_lock);
1874                         goto refind_writable;
1875                 }
1876         }
1877
1878         return NULL;
1879 }
1880
1881 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1882 {
1883         struct address_space *mapping = page->mapping;
1884         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1885         char *write_data;
1886         int rc = -EFAULT;
1887         int bytes_written = 0;
1888         struct inode *inode;
1889         struct cifsFileInfo *open_file;
1890
1891         if (!mapping || !mapping->host)
1892                 return -EFAULT;
1893
1894         inode = page->mapping->host;
1895
1896         offset += (loff_t)from;
1897         write_data = kmap(page);
1898         write_data += from;
1899
1900         if ((to > PAGE_SIZE) || (from > to)) {
1901                 kunmap(page);
1902                 return -EIO;
1903         }
1904
1905         /* racing with truncate? */
1906         if (offset > mapping->host->i_size) {
1907                 kunmap(page);
1908                 return 0; /* don't care */
1909         }
1910
1911         /* check to make sure that we are not extending the file */
1912         if (mapping->host->i_size - offset < (loff_t)to)
1913                 to = (unsigned)(mapping->host->i_size - offset);
1914
1915         open_file = find_writable_file(CIFS_I(mapping->host), false);
1916         if (open_file) {
1917                 bytes_written = cifs_write(open_file, open_file->pid,
1918                                            write_data, to - from, &offset);
1919                 cifsFileInfo_put(open_file);
1920                 /* Does mm or vfs already set times? */
1921                 inode->i_atime = inode->i_mtime = current_time(inode);
1922                 if ((bytes_written > 0) && (offset))
1923                         rc = 0;
1924                 else if (bytes_written < 0)
1925                         rc = bytes_written;
1926         } else {
1927                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1928                 rc = -EIO;
1929         }
1930
1931         kunmap(page);
1932         return rc;
1933 }
1934
1935 static struct cifs_writedata *
1936 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1937                           pgoff_t end, pgoff_t *index,
1938                           unsigned int *found_pages)
1939 {
1940         unsigned int nr_pages;
1941         struct page **pages;
1942         struct cifs_writedata *wdata;
1943
1944         wdata = cifs_writedata_alloc((unsigned int)tofind,
1945                                      cifs_writev_complete);
1946         if (!wdata)
1947                 return NULL;
1948
1949         /*
1950          * find_get_pages_tag seems to return a max of 256 on each
1951          * iteration, so we must call it several times in order to
1952          * fill the array or the wsize is effectively limited to
1953          * 256 * PAGE_SIZE.
1954          */
1955         *found_pages = 0;
1956         pages = wdata->pages;
1957         do {
1958                 nr_pages = find_get_pages_tag(mapping, index,
1959                                               PAGECACHE_TAG_DIRTY, tofind,
1960                                               pages);
1961                 *found_pages += nr_pages;
1962                 tofind -= nr_pages;
1963                 pages += nr_pages;
1964         } while (nr_pages && tofind && *index <= end);
1965
1966         return wdata;
1967 }
1968
1969 static unsigned int
1970 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1971                     struct address_space *mapping,
1972                     struct writeback_control *wbc,
1973                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1974 {
1975         unsigned int nr_pages = 0, i;
1976         struct page *page;
1977
1978         for (i = 0; i < found_pages; i++) {
1979                 page = wdata->pages[i];
1980                 /*
1981                  * At this point we hold neither mapping->tree_lock nor
1982                  * lock on the page itself: the page may be truncated or
1983                  * invalidated (changing page->mapping to NULL), or even
1984                  * swizzled back from swapper_space to tmpfs file
1985                  * mapping
1986                  */
1987
1988                 if (nr_pages == 0)
1989                         lock_page(page);
1990                 else if (!trylock_page(page))
1991                         break;
1992
1993                 if (unlikely(page->mapping != mapping)) {
1994                         unlock_page(page);
1995                         break;
1996                 }
1997
1998                 if (!wbc->range_cyclic && page->index > end) {
1999                         *done = true;
2000                         unlock_page(page);
2001                         break;
2002                 }
2003
2004                 if (*next && (page->index != *next)) {
2005                         /* Not next consecutive page */
2006                         unlock_page(page);
2007                         break;
2008                 }
2009
2010                 if (wbc->sync_mode != WB_SYNC_NONE)
2011                         wait_on_page_writeback(page);
2012
2013                 if (PageWriteback(page) ||
2014                                 !clear_page_dirty_for_io(page)) {
2015                         unlock_page(page);
2016                         break;
2017                 }
2018
2019                 /*
2020                  * This actually clears the dirty bit in the radix tree.
2021                  * See cifs_writepage() for more commentary.
2022                  */
2023                 set_page_writeback(page);
2024                 if (page_offset(page) >= i_size_read(mapping->host)) {
2025                         *done = true;
2026                         unlock_page(page);
2027                         end_page_writeback(page);
2028                         break;
2029                 }
2030
2031                 wdata->pages[i] = page;
2032                 *next = page->index + 1;
2033                 ++nr_pages;
2034         }
2035
2036         /* reset index to refind any pages skipped */
2037         if (nr_pages == 0)
2038                 *index = wdata->pages[0]->index + 1;
2039
2040         /* put any pages we aren't going to use */
2041         for (i = nr_pages; i < found_pages; i++) {
2042                 put_page(wdata->pages[i]);
2043                 wdata->pages[i] = NULL;
2044         }
2045
2046         return nr_pages;
2047 }
2048
2049 static int
2050 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2051                  struct address_space *mapping, struct writeback_control *wbc)
2052 {
2053         int rc = 0;
2054         struct TCP_Server_Info *server;
2055         unsigned int i;
2056
2057         wdata->sync_mode = wbc->sync_mode;
2058         wdata->nr_pages = nr_pages;
2059         wdata->offset = page_offset(wdata->pages[0]);
2060         wdata->pagesz = PAGE_SIZE;
2061         wdata->tailsz = min(i_size_read(mapping->host) -
2062                         page_offset(wdata->pages[nr_pages - 1]),
2063                         (loff_t)PAGE_SIZE);
2064         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2065
2066         if (wdata->cfile != NULL)
2067                 cifsFileInfo_put(wdata->cfile);
2068         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2069         if (!wdata->cfile) {
2070                 cifs_dbg(VFS, "No writable handles for inode\n");
2071                 rc = -EBADF;
2072         } else {
2073                 wdata->pid = wdata->cfile->pid;
2074                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2075                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2076         }
2077
2078         for (i = 0; i < nr_pages; ++i)
2079                 unlock_page(wdata->pages[i]);
2080
2081         return rc;
2082 }
2083
2084 static int cifs_writepages(struct address_space *mapping,
2085                            struct writeback_control *wbc)
2086 {
2087         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2088         struct TCP_Server_Info *server;
2089         bool done = false, scanned = false, range_whole = false;
2090         pgoff_t end, index;
2091         struct cifs_writedata *wdata;
2092         int rc = 0;
2093
2094         /*
2095          * If wsize is smaller than the page cache size, default to writing
2096          * one page at a time via cifs_writepage
2097          */
2098         if (cifs_sb->wsize < PAGE_SIZE)
2099                 return generic_writepages(mapping, wbc);
2100
2101         if (wbc->range_cyclic) {
2102                 index = mapping->writeback_index; /* Start from prev offset */
2103                 end = -1;
2104         } else {
2105                 index = wbc->range_start >> PAGE_SHIFT;
2106                 end = wbc->range_end >> PAGE_SHIFT;
2107                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2108                         range_whole = true;
2109                 scanned = true;
2110         }
2111         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2112 retry:
2113         while (!done && index <= end) {
2114                 unsigned int i, nr_pages, found_pages, wsize, credits;
2115                 pgoff_t next = 0, tofind, saved_index = index;
2116
2117                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2118                                                    &wsize, &credits);
2119                 if (rc)
2120                         break;
2121
2122                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2123
2124                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2125                                                   &found_pages);
2126                 if (!wdata) {
2127                         rc = -ENOMEM;
2128                         add_credits_and_wake_if(server, credits, 0);
2129                         break;
2130                 }
2131
2132                 if (found_pages == 0) {
2133                         kref_put(&wdata->refcount, cifs_writedata_release);
2134                         add_credits_and_wake_if(server, credits, 0);
2135                         break;
2136                 }
2137
2138                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2139                                                end, &index, &next, &done);
2140
2141                 /* nothing to write? */
2142                 if (nr_pages == 0) {
2143                         kref_put(&wdata->refcount, cifs_writedata_release);
2144                         add_credits_and_wake_if(server, credits, 0);
2145                         continue;
2146                 }
2147
2148                 wdata->credits = credits;
2149
2150                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2151
2152                 /* send failure -- clean up the mess */
2153                 if (rc != 0) {
2154                         add_credits_and_wake_if(server, wdata->credits, 0);
2155                         for (i = 0; i < nr_pages; ++i) {
2156                                 if (rc == -EAGAIN)
2157                                         redirty_page_for_writepage(wbc,
2158                                                            wdata->pages[i]);
2159                                 else
2160                                         SetPageError(wdata->pages[i]);
2161                                 end_page_writeback(wdata->pages[i]);
2162                                 put_page(wdata->pages[i]);
2163                         }
2164                         if (rc != -EAGAIN)
2165                                 mapping_set_error(mapping, rc);
2166                 }
2167                 kref_put(&wdata->refcount, cifs_writedata_release);
2168
2169                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2170                         index = saved_index;
2171                         continue;
2172                 }
2173
2174                 wbc->nr_to_write -= nr_pages;
2175                 if (wbc->nr_to_write <= 0)
2176                         done = true;
2177
2178                 index = next;
2179         }
2180
2181         if (!scanned && !done) {
2182                 /*
2183                  * We hit the last page and there is more work to be done: wrap
2184                  * back to the start of the file
2185                  */
2186                 scanned = true;
2187                 index = 0;
2188                 goto retry;
2189         }
2190
2191         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2192                 mapping->writeback_index = index;
2193
2194         return rc;
2195 }
2196
2197 static int
2198 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2199 {
2200         int rc;
2201         unsigned int xid;
2202
2203         xid = get_xid();
2204 /* BB add check for wbc flags */
2205         get_page(page);
2206         if (!PageUptodate(page))
2207                 cifs_dbg(FYI, "ppw - page not up to date\n");
2208
2209         /*
2210          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2211          *
2212          * A writepage() implementation always needs to do either this,
2213          * or re-dirty the page with "redirty_page_for_writepage()" in
2214          * the case of a failure.
2215          *
2216          * Just unlocking the page will cause the radix tree tag-bits
2217          * to fail to update with the state of the page correctly.
2218          */
2219         set_page_writeback(page);
2220 retry_write:
2221         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2222         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2223                 goto retry_write;
2224         else if (rc == -EAGAIN)
2225                 redirty_page_for_writepage(wbc, page);
2226         else if (rc != 0)
2227                 SetPageError(page);
2228         else
2229                 SetPageUptodate(page);
2230         end_page_writeback(page);
2231         put_page(page);
2232         free_xid(xid);
2233         return rc;
2234 }
2235
2236 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2237 {
2238         int rc = cifs_writepage_locked(page, wbc);
2239         unlock_page(page);
2240         return rc;
2241 }
2242
2243 static int cifs_write_end(struct file *file, struct address_space *mapping,
2244                         loff_t pos, unsigned len, unsigned copied,
2245                         struct page *page, void *fsdata)
2246 {
2247         int rc;
2248         struct inode *inode = mapping->host;
2249         struct cifsFileInfo *cfile = file->private_data;
2250         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2251         __u32 pid;
2252
2253         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2254                 pid = cfile->pid;
2255         else
2256                 pid = current->tgid;
2257
2258         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2259                  page, pos, copied);
2260
2261         if (PageChecked(page)) {
2262                 if (copied == len)
2263                         SetPageUptodate(page);
2264                 ClearPageChecked(page);
2265         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2266                 SetPageUptodate(page);
2267
2268         if (!PageUptodate(page)) {
2269                 char *page_data;
2270                 unsigned offset = pos & (PAGE_SIZE - 1);
2271                 unsigned int xid;
2272
2273                 xid = get_xid();
2274                 /* this is probably better than directly calling
2275                    partialpage_write since in this function the file handle is
2276                    known which we might as well leverage */
2277                 /* BB check if anything else missing out of ppw
2278                    such as updating last write time */
2279                 page_data = kmap(page);
2280                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2281                 /* if (rc < 0) should we set writebehind rc? */
2282                 kunmap(page);
2283
2284                 free_xid(xid);
2285         } else {
2286                 rc = copied;
2287                 pos += copied;
2288                 set_page_dirty(page);
2289         }
2290
2291         if (rc > 0) {
2292                 spin_lock(&inode->i_lock);
2293                 if (pos > inode->i_size)
2294                         i_size_write(inode, pos);
2295                 spin_unlock(&inode->i_lock);
2296         }
2297
2298         unlock_page(page);
2299         put_page(page);
2300
2301         return rc;
2302 }
2303
2304 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2305                       int datasync)
2306 {
2307         unsigned int xid;
2308         int rc = 0;
2309         struct cifs_tcon *tcon;
2310         struct TCP_Server_Info *server;
2311         struct cifsFileInfo *smbfile = file->private_data;
2312         struct inode *inode = file_inode(file);
2313         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2314
2315         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2316         if (rc)
2317                 return rc;
2318         inode_lock(inode);
2319
2320         xid = get_xid();
2321
2322         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2323                  file, datasync);
2324
2325         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2326                 rc = cifs_zap_mapping(inode);
2327                 if (rc) {
2328                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2329                         rc = 0; /* don't care about it in fsync */
2330                 }
2331         }
2332
2333         tcon = tlink_tcon(smbfile->tlink);
2334         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2335                 server = tcon->ses->server;
2336                 if (server->ops->flush)
2337                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2338                 else
2339                         rc = -ENOSYS;
2340         }
2341
2342         free_xid(xid);
2343         inode_unlock(inode);
2344         return rc;
2345 }
2346
2347 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2348 {
2349         unsigned int xid;
2350         int rc = 0;
2351         struct cifs_tcon *tcon;
2352         struct TCP_Server_Info *server;
2353         struct cifsFileInfo *smbfile = file->private_data;
2354         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2355         struct inode *inode = file->f_mapping->host;
2356
2357         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2358         if (rc)
2359                 return rc;
2360         inode_lock(inode);
2361
2362         xid = get_xid();
2363
2364         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2365                  file, datasync);
2366
2367         tcon = tlink_tcon(smbfile->tlink);
2368         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2369                 server = tcon->ses->server;
2370                 if (server->ops->flush)
2371                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2372                 else
2373                         rc = -ENOSYS;
2374         }
2375
2376         free_xid(xid);
2377         inode_unlock(inode);
2378         return rc;
2379 }
2380
2381 /*
2382  * As file closes, flush all cached write data for this inode checking
2383  * for write behind errors.
2384  */
2385 int cifs_flush(struct file *file, fl_owner_t id)
2386 {
2387         struct inode *inode = file_inode(file);
2388         int rc = 0;
2389
2390         if (file->f_mode & FMODE_WRITE)
2391                 rc = filemap_write_and_wait(inode->i_mapping);
2392
2393         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2394
2395         return rc;
2396 }
2397
2398 static int
2399 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2400 {
2401         int rc = 0;
2402         unsigned long i;
2403
2404         for (i = 0; i < num_pages; i++) {
2405                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2406                 if (!pages[i]) {
2407                         /*
2408                          * save number of pages we have already allocated and
2409                          * return with ENOMEM error
2410                          */
2411                         num_pages = i;
2412                         rc = -ENOMEM;
2413                         break;
2414                 }
2415         }
2416
2417         if (rc) {
2418                 for (i = 0; i < num_pages; i++)
2419                         put_page(pages[i]);
2420         }
2421         return rc;
2422 }
2423
2424 static inline
2425 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2426 {
2427         size_t num_pages;
2428         size_t clen;
2429
2430         clen = min_t(const size_t, len, wsize);
2431         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2432
2433         if (cur_len)
2434                 *cur_len = clen;
2435
2436         return num_pages;
2437 }
2438
2439 static void
2440 cifs_uncached_writedata_release(struct kref *refcount)
2441 {
2442         int i;
2443         struct cifs_writedata *wdata = container_of(refcount,
2444                                         struct cifs_writedata, refcount);
2445
2446         for (i = 0; i < wdata->nr_pages; i++)
2447                 put_page(wdata->pages[i]);
2448         cifs_writedata_release(refcount);
2449 }
2450
2451 static void
2452 cifs_uncached_writev_complete(struct work_struct *work)
2453 {
2454         struct cifs_writedata *wdata = container_of(work,
2455                                         struct cifs_writedata, work);
2456         struct inode *inode = d_inode(wdata->cfile->dentry);
2457         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2458
2459         spin_lock(&inode->i_lock);
2460         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2461         if (cifsi->server_eof > inode->i_size)
2462                 i_size_write(inode, cifsi->server_eof);
2463         spin_unlock(&inode->i_lock);
2464
2465         complete(&wdata->done);
2466
2467         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2468 }
2469
2470 static int
2471 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2472                       size_t *len, unsigned long *num_pages)
2473 {
2474         size_t save_len, copied, bytes, cur_len = *len;
2475         unsigned long i, nr_pages = *num_pages;
2476
2477         save_len = cur_len;
2478         for (i = 0; i < nr_pages; i++) {
2479                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2480                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2481                 cur_len -= copied;
2482                 /*
2483                  * If we didn't copy as much as we expected, then that
2484                  * may mean we trod into an unmapped area. Stop copying
2485                  * at that point. On the next pass through the big
2486                  * loop, we'll likely end up getting a zero-length
2487                  * write and bailing out of it.
2488                  */
2489                 if (copied < bytes)
2490                         break;
2491         }
2492         cur_len = save_len - cur_len;
2493         *len = cur_len;
2494
2495         /*
2496          * If we have no data to send, then that probably means that
2497          * the copy above failed altogether. That's most likely because
2498          * the address in the iovec was bogus. Return -EFAULT and let
2499          * the caller free anything we allocated and bail out.
2500          */
2501         if (!cur_len)
2502                 return -EFAULT;
2503
2504         /*
2505          * i + 1 now represents the number of pages we actually used in
2506          * the copy phase above.
2507          */
2508         *num_pages = i + 1;
2509         return 0;
2510 }
2511
2512 static int
2513 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2514                      struct cifsFileInfo *open_file,
2515                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2516 {
2517         int rc = 0;
2518         size_t cur_len;
2519         unsigned long nr_pages, num_pages, i;
2520         struct cifs_writedata *wdata;
2521         struct iov_iter saved_from = *from;
2522         loff_t saved_offset = offset;
2523         pid_t pid;
2524         struct TCP_Server_Info *server;
2525
2526         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2527                 pid = open_file->pid;
2528         else
2529                 pid = current->tgid;
2530
2531         server = tlink_tcon(open_file->tlink)->ses->server;
2532
2533         do {
2534                 unsigned int wsize, credits;
2535
2536                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2537                                                    &wsize, &credits);
2538                 if (rc)
2539                         break;
2540
2541                 nr_pages = get_numpages(wsize, len, &cur_len);
2542                 wdata = cifs_writedata_alloc(nr_pages,
2543                                              cifs_uncached_writev_complete);
2544                 if (!wdata) {
2545                         rc = -ENOMEM;
2546                         add_credits_and_wake_if(server, credits, 0);
2547                         break;
2548                 }
2549
2550                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2551                 if (rc) {
2552                         kfree(wdata);
2553                         add_credits_and_wake_if(server, credits, 0);
2554                         break;
2555                 }
2556
2557                 num_pages = nr_pages;
2558                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2559                 if (rc) {
2560                         for (i = 0; i < nr_pages; i++)
2561                                 put_page(wdata->pages[i]);
2562                         kfree(wdata);
2563                         add_credits_and_wake_if(server, credits, 0);
2564                         break;
2565                 }
2566
2567                 /*
2568                  * Bring nr_pages down to the number of pages we actually used,
2569                  * and free any pages that we didn't use.
2570                  */
2571                 for ( ; nr_pages > num_pages; nr_pages--)
2572                         put_page(wdata->pages[nr_pages - 1]);
2573
2574                 wdata->sync_mode = WB_SYNC_ALL;
2575                 wdata->nr_pages = nr_pages;
2576                 wdata->offset = (__u64)offset;
2577                 wdata->cfile = cifsFileInfo_get(open_file);
2578                 wdata->pid = pid;
2579                 wdata->bytes = cur_len;
2580                 wdata->pagesz = PAGE_SIZE;
2581                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2582                 wdata->credits = credits;
2583
2584                 if (!wdata->cfile->invalidHandle ||
2585                     !cifs_reopen_file(wdata->cfile, false))
2586                         rc = server->ops->async_writev(wdata,
2587                                         cifs_uncached_writedata_release);
2588                 if (rc) {
2589                         add_credits_and_wake_if(server, wdata->credits, 0);
2590                         kref_put(&wdata->refcount,
2591                                  cifs_uncached_writedata_release);
2592                         if (rc == -EAGAIN) {
2593                                 *from = saved_from;
2594                                 iov_iter_advance(from, offset - saved_offset);
2595                                 continue;
2596                         }
2597                         break;
2598                 }
2599
2600                 list_add_tail(&wdata->list, wdata_list);
2601                 offset += cur_len;
2602                 len -= cur_len;
2603         } while (len > 0);
2604
2605         return rc;
2606 }
2607
2608 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2609 {
2610         struct file *file = iocb->ki_filp;
2611         ssize_t total_written = 0;
2612         struct cifsFileInfo *open_file;
2613         struct cifs_tcon *tcon;
2614         struct cifs_sb_info *cifs_sb;
2615         struct cifs_writedata *wdata, *tmp;
2616         struct list_head wdata_list;
2617         struct iov_iter saved_from = *from;
2618         int rc;
2619
2620         /*
2621          * BB - optimize the way when signing is disabled. We can drop this
2622          * extra memory-to-memory copying and use iovec buffers for constructing
2623          * write request.
2624          */
2625
2626         rc = generic_write_checks(iocb, from);
2627         if (rc <= 0)
2628                 return rc;
2629
2630         INIT_LIST_HEAD(&wdata_list);
2631         cifs_sb = CIFS_FILE_SB(file);
2632         open_file = file->private_data;
2633         tcon = tlink_tcon(open_file->tlink);
2634
2635         if (!tcon->ses->server->ops->async_writev)
2636                 return -ENOSYS;
2637
2638         rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2639                                   open_file, cifs_sb, &wdata_list);
2640
2641         /*
2642          * If at least one write was successfully sent, then discard any rc
2643          * value from the later writes. If the other write succeeds, then
2644          * we'll end up returning whatever was written. If it fails, then
2645          * we'll get a new rc value from that.
2646          */
2647         if (!list_empty(&wdata_list))
2648                 rc = 0;
2649
2650         /*
2651          * Wait for and collect replies for any successful sends in order of
2652          * increasing offset. Once an error is hit or we get a fatal signal
2653          * while waiting, then return without waiting for any more replies.
2654          */
2655 restart_loop:
2656         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2657                 if (!rc) {
2658                         /* FIXME: freezable too? */
2659                         rc = wait_for_completion_killable(&wdata->done);
2660                         if (rc)
2661                                 rc = -EINTR;
2662                         else if (wdata->result)
2663                                 rc = wdata->result;
2664                         else
2665                                 total_written += wdata->bytes;
2666
2667                         /* resend call if it's a retryable error */
2668                         if (rc == -EAGAIN) {
2669                                 struct list_head tmp_list;
2670                                 struct iov_iter tmp_from = saved_from;
2671
2672                                 INIT_LIST_HEAD(&tmp_list);
2673                                 list_del_init(&wdata->list);
2674
2675                                 iov_iter_advance(&tmp_from,
2676                                                  wdata->offset - iocb->ki_pos);
2677
2678                                 rc = cifs_write_from_iter(wdata->offset,
2679                                                 wdata->bytes, &tmp_from,
2680                                                 open_file, cifs_sb, &tmp_list);
2681
2682                                 list_splice(&tmp_list, &wdata_list);
2683
2684                                 kref_put(&wdata->refcount,
2685                                          cifs_uncached_writedata_release);
2686                                 goto restart_loop;
2687                         }
2688                 }
2689                 list_del_init(&wdata->list);
2690                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2691         }
2692
2693         if (unlikely(!total_written))
2694                 return rc;
2695
2696         iocb->ki_pos += total_written;
2697         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2698         cifs_stats_bytes_written(tcon, total_written);
2699         return total_written;
2700 }
2701
2702 static ssize_t
2703 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2704 {
2705         struct file *file = iocb->ki_filp;
2706         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2707         struct inode *inode = file->f_mapping->host;
2708         struct cifsInodeInfo *cinode = CIFS_I(inode);
2709         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2710         ssize_t rc;
2711
2712         /*
2713          * We need to hold the sem to be sure nobody modifies lock list
2714          * with a brlock that prevents writing.
2715          */
2716         down_read(&cinode->lock_sem);
2717         inode_lock(inode);
2718
2719         rc = generic_write_checks(iocb, from);
2720         if (rc <= 0)
2721                 goto out;
2722
2723         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2724                                      server->vals->exclusive_lock_type, NULL,
2725                                      CIFS_WRITE_OP))
2726                 rc = __generic_file_write_iter(iocb, from);
2727         else
2728                 rc = -EACCES;
2729 out:
2730         inode_unlock(inode);
2731
2732         if (rc > 0)
2733                 rc = generic_write_sync(iocb, rc);
2734         up_read(&cinode->lock_sem);
2735         return rc;
2736 }
2737
2738 ssize_t
2739 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2740 {
2741         struct inode *inode = file_inode(iocb->ki_filp);
2742         struct cifsInodeInfo *cinode = CIFS_I(inode);
2743         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2744         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2745                                                 iocb->ki_filp->private_data;
2746         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2747         ssize_t written;
2748
2749         written = cifs_get_writer(cinode);
2750         if (written)
2751                 return written;
2752
2753         if (CIFS_CACHE_WRITE(cinode)) {
2754                 if (cap_unix(tcon->ses) &&
2755                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2756                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2757                         written = generic_file_write_iter(iocb, from);
2758                         goto out;
2759                 }
2760                 written = cifs_writev(iocb, from);
2761                 goto out;
2762         }
2763         /*
2764          * For non-oplocked files in strict cache mode we need to write the data
2765          * to the server exactly from the pos to pos+len-1 rather than flush all
2766          * affected pages because it may cause a error with mandatory locks on
2767          * these pages but not on the region from pos to ppos+len-1.
2768          */
2769         written = cifs_user_writev(iocb, from);
2770         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2771                 /*
2772                  * Windows 7 server can delay breaking level2 oplock if a write
2773                  * request comes - break it on the client to prevent reading
2774                  * an old data.
2775                  */
2776                 cifs_zap_mapping(inode);
2777                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2778                          inode);
2779                 cinode->oplock = 0;
2780         }
2781 out:
2782         cifs_put_writer(cinode);
2783         return written;
2784 }
2785
2786 static struct cifs_readdata *
2787 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2788 {
2789         struct cifs_readdata *rdata;
2790
2791         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2792                         GFP_KERNEL);
2793         if (rdata != NULL) {
2794                 kref_init(&rdata->refcount);
2795                 INIT_LIST_HEAD(&rdata->list);
2796                 init_completion(&rdata->done);
2797                 INIT_WORK(&rdata->work, complete);
2798         }
2799
2800         return rdata;
2801 }
2802
2803 void
2804 cifs_readdata_release(struct kref *refcount)
2805 {
2806         struct cifs_readdata *rdata = container_of(refcount,
2807                                         struct cifs_readdata, refcount);
2808
2809         if (rdata->cfile)
2810                 cifsFileInfo_put(rdata->cfile);
2811
2812         kfree(rdata);
2813 }
2814
2815 static int
2816 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2817 {
2818         int rc = 0;
2819         struct page *page;
2820         unsigned int i;
2821
2822         for (i = 0; i < nr_pages; i++) {
2823                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2824                 if (!page) {
2825                         rc = -ENOMEM;
2826                         break;
2827                 }
2828                 rdata->pages[i] = page;
2829         }
2830
2831         if (rc) {
2832                 for (i = 0; i < nr_pages; i++) {
2833                         put_page(rdata->pages[i]);
2834                         rdata->pages[i] = NULL;
2835                 }
2836         }
2837         return rc;
2838 }
2839
2840 static void
2841 cifs_uncached_readdata_release(struct kref *refcount)
2842 {
2843         struct cifs_readdata *rdata = container_of(refcount,
2844                                         struct cifs_readdata, refcount);
2845         unsigned int i;
2846
2847         for (i = 0; i < rdata->nr_pages; i++) {
2848                 put_page(rdata->pages[i]);
2849                 rdata->pages[i] = NULL;
2850         }
2851         cifs_readdata_release(refcount);
2852 }
2853
2854 /**
2855  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2856  * @rdata:      the readdata response with list of pages holding data
2857  * @iter:       destination for our data
2858  *
2859  * This function copies data from a list of pages in a readdata response into
2860  * an array of iovecs. It will first calculate where the data should go
2861  * based on the info in the readdata and then copy the data into that spot.
2862  */
2863 static int
2864 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2865 {
2866         size_t remaining = rdata->got_bytes;
2867         unsigned int i;
2868
2869         for (i = 0; i < rdata->nr_pages; i++) {
2870                 struct page *page = rdata->pages[i];
2871                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2872                 size_t written = copy_page_to_iter(page, 0, copy, iter);
2873                 remaining -= written;
2874                 if (written < copy && iov_iter_count(iter) > 0)
2875                         break;
2876         }
2877         return remaining ? -EFAULT : 0;
2878 }
2879
2880 static void
2881 cifs_uncached_readv_complete(struct work_struct *work)
2882 {
2883         struct cifs_readdata *rdata = container_of(work,
2884                                                 struct cifs_readdata, work);
2885
2886         complete(&rdata->done);
2887         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2888 }
2889
2890 static int
2891 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2892                         struct cifs_readdata *rdata, unsigned int len)
2893 {
2894         int result = 0;
2895         unsigned int i;
2896         unsigned int nr_pages = rdata->nr_pages;
2897
2898         rdata->got_bytes = 0;
2899         rdata->tailsz = PAGE_SIZE;
2900         for (i = 0; i < nr_pages; i++) {
2901                 struct page *page = rdata->pages[i];
2902                 size_t n;
2903
2904                 if (len <= 0) {
2905                         /* no need to hold page hostage */
2906                         rdata->pages[i] = NULL;
2907                         rdata->nr_pages--;
2908                         put_page(page);
2909                         continue;
2910                 }
2911                 n = len;
2912                 if (len >= PAGE_SIZE) {
2913                         /* enough data to fill the page */
2914                         n = PAGE_SIZE;
2915                         len -= n;
2916                 } else {
2917                         zero_user(page, len, PAGE_SIZE - len);
2918                         rdata->tailsz = len;
2919                         len = 0;
2920                 }
2921                 result = cifs_read_page_from_socket(server, page, n);
2922                 if (result < 0)
2923                         break;
2924
2925                 rdata->got_bytes += result;
2926         }
2927
2928         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2929                                                 rdata->got_bytes : result;
2930 }
2931
2932 static int
2933 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2934                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2935 {
2936         struct cifs_readdata *rdata;
2937         unsigned int npages, rsize, credits;
2938         size_t cur_len;
2939         int rc;
2940         pid_t pid;
2941         struct TCP_Server_Info *server;
2942
2943         server = tlink_tcon(open_file->tlink)->ses->server;
2944
2945         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2946                 pid = open_file->pid;
2947         else
2948                 pid = current->tgid;
2949
2950         do {
2951                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2952                                                    &rsize, &credits);
2953                 if (rc)
2954                         break;
2955
2956                 cur_len = min_t(const size_t, len, rsize);
2957                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2958
2959                 /* allocate a readdata struct */
2960                 rdata = cifs_readdata_alloc(npages,
2961                                             cifs_uncached_readv_complete);
2962                 if (!rdata) {
2963                         add_credits_and_wake_if(server, credits, 0);
2964                         rc = -ENOMEM;
2965                         break;
2966                 }
2967
2968                 rc = cifs_read_allocate_pages(rdata, npages);
2969                 if (rc)
2970                         goto error;
2971
2972                 rdata->cfile = cifsFileInfo_get(open_file);
2973                 rdata->nr_pages = npages;
2974                 rdata->offset = offset;
2975                 rdata->bytes = cur_len;
2976                 rdata->pid = pid;
2977                 rdata->pagesz = PAGE_SIZE;
2978                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2979                 rdata->credits = credits;
2980
2981                 if (!rdata->cfile->invalidHandle ||
2982                     !cifs_reopen_file(rdata->cfile, true))
2983                         rc = server->ops->async_readv(rdata);
2984 error:
2985                 if (rc) {
2986                         add_credits_and_wake_if(server, rdata->credits, 0);
2987                         kref_put(&rdata->refcount,
2988                                  cifs_uncached_readdata_release);
2989                         if (rc == -EAGAIN)
2990                                 continue;
2991                         break;
2992                 }
2993
2994                 list_add_tail(&rdata->list, rdata_list);
2995                 offset += cur_len;
2996                 len -= cur_len;
2997         } while (len > 0);
2998
2999         return rc;
3000 }
3001
3002 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3003 {
3004         struct file *file = iocb->ki_filp;
3005         ssize_t rc;
3006         size_t len;
3007         ssize_t total_read = 0;
3008         loff_t offset = iocb->ki_pos;
3009         struct cifs_sb_info *cifs_sb;
3010         struct cifs_tcon *tcon;
3011         struct cifsFileInfo *open_file;
3012         struct cifs_readdata *rdata, *tmp;
3013         struct list_head rdata_list;
3014
3015         len = iov_iter_count(to);
3016         if (!len)
3017                 return 0;
3018
3019         INIT_LIST_HEAD(&rdata_list);
3020         cifs_sb = CIFS_FILE_SB(file);
3021         open_file = file->private_data;
3022         tcon = tlink_tcon(open_file->tlink);
3023
3024         if (!tcon->ses->server->ops->async_readv)
3025                 return -ENOSYS;
3026
3027         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3028                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3029
3030         rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3031
3032         /* if at least one read request send succeeded, then reset rc */
3033         if (!list_empty(&rdata_list))
3034                 rc = 0;
3035
3036         len = iov_iter_count(to);
3037         /* the loop below should proceed in the order of increasing offsets */
3038 again:
3039         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3040                 if (!rc) {
3041                         /* FIXME: freezable sleep too? */
3042                         rc = wait_for_completion_killable(&rdata->done);
3043                         if (rc)
3044                                 rc = -EINTR;
3045                         else if (rdata->result == -EAGAIN) {
3046                                 /* resend call if it's a retryable error */
3047                                 struct list_head tmp_list;
3048                                 unsigned int got_bytes = rdata->got_bytes;
3049
3050                                 list_del_init(&rdata->list);
3051                                 INIT_LIST_HEAD(&tmp_list);
3052
3053                                 /*
3054                                  * Got a part of data and then reconnect has
3055                                  * happened -- fill the buffer and continue
3056                                  * reading.
3057                                  */
3058                                 if (got_bytes && got_bytes < rdata->bytes) {
3059                                         rc = cifs_readdata_to_iov(rdata, to);
3060                                         if (rc) {
3061                                                 kref_put(&rdata->refcount,
3062                                                 cifs_uncached_readdata_release);
3063                                                 continue;
3064                                         }
3065                                 }
3066
3067                                 rc = cifs_send_async_read(
3068                                                 rdata->offset + got_bytes,
3069                                                 rdata->bytes - got_bytes,
3070                                                 rdata->cfile, cifs_sb,
3071                                                 &tmp_list);
3072
3073                                 list_splice(&tmp_list, &rdata_list);
3074
3075                                 kref_put(&rdata->refcount,
3076                                          cifs_uncached_readdata_release);
3077                                 goto again;
3078                         } else if (rdata->result)
3079                                 rc = rdata->result;
3080                         else
3081                                 rc = cifs_readdata_to_iov(rdata, to);
3082
3083                         /* if there was a short read -- discard anything left */
3084                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3085                                 rc = -ENODATA;
3086                 }
3087                 list_del_init(&rdata->list);
3088                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3089         }
3090
3091         total_read = len - iov_iter_count(to);
3092
3093         cifs_stats_bytes_read(tcon, total_read);
3094
3095         /* mask nodata case */
3096         if (rc == -ENODATA)
3097                 rc = 0;
3098
3099         if (total_read) {
3100                 iocb->ki_pos += total_read;
3101                 return total_read;
3102         }
3103         return rc;
3104 }
3105
3106 ssize_t
3107 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3108 {
3109         struct inode *inode = file_inode(iocb->ki_filp);
3110         struct cifsInodeInfo *cinode = CIFS_I(inode);
3111         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3112         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3113                                                 iocb->ki_filp->private_data;
3114         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3115         int rc = -EACCES;
3116
3117         /*
3118          * In strict cache mode we need to read from the server all the time
3119          * if we don't have level II oplock because the server can delay mtime
3120          * change - so we can't make a decision about inode invalidating.
3121          * And we can also fail with pagereading if there are mandatory locks
3122          * on pages affected by this read but not on the region from pos to
3123          * pos+len-1.
3124          */
3125         if (!CIFS_CACHE_READ(cinode))
3126                 return cifs_user_readv(iocb, to);
3127
3128         if (cap_unix(tcon->ses) &&
3129             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3130             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3131                 return generic_file_read_iter(iocb, to);
3132
3133         /*
3134          * We need to hold the sem to be sure nobody modifies lock list
3135          * with a brlock that prevents reading.
3136          */
3137         down_read(&cinode->lock_sem);
3138         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3139                                      tcon->ses->server->vals->shared_lock_type,
3140                                      NULL, CIFS_READ_OP))
3141                 rc = generic_file_read_iter(iocb, to);
3142         up_read(&cinode->lock_sem);
3143         return rc;
3144 }
3145
3146 static ssize_t
3147 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3148 {
3149         int rc = -EACCES;
3150         unsigned int bytes_read = 0;
3151         unsigned int total_read;
3152         unsigned int current_read_size;
3153         unsigned int rsize;
3154         struct cifs_sb_info *cifs_sb;
3155         struct cifs_tcon *tcon;
3156         struct TCP_Server_Info *server;
3157         unsigned int xid;
3158         char *cur_offset;
3159         struct cifsFileInfo *open_file;
3160         struct cifs_io_parms io_parms;
3161         int buf_type = CIFS_NO_BUFFER;
3162         __u32 pid;
3163
3164         xid = get_xid();
3165         cifs_sb = CIFS_FILE_SB(file);
3166
3167         /* FIXME: set up handlers for larger reads and/or convert to async */
3168         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3169
3170         if (file->private_data == NULL) {
3171                 rc = -EBADF;
3172                 free_xid(xid);
3173                 return rc;
3174         }
3175         open_file = file->private_data;
3176         tcon = tlink_tcon(open_file->tlink);
3177         server = tcon->ses->server;
3178
3179         if (!server->ops->sync_read) {
3180                 free_xid(xid);
3181                 return -ENOSYS;
3182         }
3183
3184         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3185                 pid = open_file->pid;
3186         else
3187                 pid = current->tgid;
3188
3189         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3190                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3191
3192         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3193              total_read += bytes_read, cur_offset += bytes_read) {
3194                 do {
3195                         current_read_size = min_t(uint, read_size - total_read,
3196                                                   rsize);
3197                         /*
3198                          * For windows me and 9x we do not want to request more
3199                          * than it negotiated since it will refuse the read
3200                          * then.
3201                          */
3202                         if ((tcon->ses) && !(tcon->ses->capabilities &
3203                                 tcon->ses->server->vals->cap_large_files)) {
3204                                 current_read_size = min_t(uint,
3205                                         current_read_size, CIFSMaxBufSize);
3206                         }
3207                         if (open_file->invalidHandle) {
3208                                 rc = cifs_reopen_file(open_file, true);
3209                                 if (rc != 0)
3210                                         break;
3211                         }
3212                         io_parms.pid = pid;
3213                         io_parms.tcon = tcon;
3214                         io_parms.offset = *offset;
3215                         io_parms.length = current_read_size;
3216                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3217                                                     &bytes_read, &cur_offset,
3218                                                     &buf_type);
3219                 } while (rc == -EAGAIN);
3220
3221                 if (rc || (bytes_read == 0)) {
3222                         if (total_read) {
3223                                 break;
3224                         } else {
3225                                 free_xid(xid);
3226                                 return rc;
3227                         }
3228                 } else {
3229                         cifs_stats_bytes_read(tcon, total_read);
3230                         *offset += bytes_read;
3231                 }
3232         }
3233         free_xid(xid);
3234         return total_read;
3235 }
3236
3237 /*
3238  * If the page is mmap'ed into a process' page tables, then we need to make
3239  * sure that it doesn't change while being written back.
3240  */
3241 static int
3242 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3243 {
3244         struct page *page = vmf->page;
3245
3246         lock_page(page);
3247         return VM_FAULT_LOCKED;
3248 }
3249
3250 static const struct vm_operations_struct cifs_file_vm_ops = {
3251         .fault = filemap_fault,
3252         .map_pages = filemap_map_pages,
3253         .page_mkwrite = cifs_page_mkwrite,
3254 };
3255
3256 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3257 {
3258         int rc, xid;
3259         struct inode *inode = file_inode(file);
3260
3261         xid = get_xid();
3262
3263         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3264                 rc = cifs_zap_mapping(inode);
3265                 if (rc)
3266                         return rc;
3267         }
3268
3269         rc = generic_file_mmap(file, vma);
3270         if (rc == 0)
3271                 vma->vm_ops = &cifs_file_vm_ops;
3272         free_xid(xid);
3273         return rc;
3274 }
3275
3276 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3277 {
3278         int rc, xid;
3279
3280         xid = get_xid();
3281         rc = cifs_revalidate_file(file);
3282         if (rc) {
3283                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3284                          rc);
3285                 free_xid(xid);
3286                 return rc;
3287         }
3288         rc = generic_file_mmap(file, vma);
3289         if (rc == 0)
3290                 vma->vm_ops = &cifs_file_vm_ops;
3291         free_xid(xid);
3292         return rc;
3293 }
3294
3295 static void
3296 cifs_readv_complete(struct work_struct *work)
3297 {
3298         unsigned int i, got_bytes;
3299         struct cifs_readdata *rdata = container_of(work,
3300                                                 struct cifs_readdata, work);
3301
3302         got_bytes = rdata->got_bytes;
3303         for (i = 0; i < rdata->nr_pages; i++) {
3304                 struct page *page = rdata->pages[i];
3305
3306                 lru_cache_add_file(page);
3307
3308                 if (rdata->result == 0 ||
3309                     (rdata->result == -EAGAIN && got_bytes)) {
3310                         flush_dcache_page(page);
3311                         SetPageUptodate(page);
3312                 }
3313
3314                 unlock_page(page);
3315
3316                 if (rdata->result == 0 ||
3317                     (rdata->result == -EAGAIN && got_bytes))
3318                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3319
3320                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3321
3322                 put_page(page);
3323                 rdata->pages[i] = NULL;
3324         }
3325         kref_put(&rdata->refcount, cifs_readdata_release);
3326 }
3327
3328 static int
3329 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3330                         struct cifs_readdata *rdata, unsigned int len)
3331 {
3332         int result = 0;
3333         unsigned int i;
3334         u64 eof;
3335         pgoff_t eof_index;
3336         unsigned int nr_pages = rdata->nr_pages;
3337
3338         /* determine the eof that the server (probably) has */
3339         eof = CIFS_I(rdata->mapping->host)->server_eof;
3340         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3341         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3342
3343         rdata->got_bytes = 0;
3344         rdata->tailsz = PAGE_SIZE;
3345         for (i = 0; i < nr_pages; i++) {
3346                 struct page *page = rdata->pages[i];
3347                 size_t n = PAGE_SIZE;
3348
3349                 if (len >= PAGE_SIZE) {
3350                         len -= PAGE_SIZE;
3351                 } else if (len > 0) {
3352                         /* enough for partial page, fill and zero the rest */
3353                         zero_user(page, len, PAGE_SIZE - len);
3354                         n = rdata->tailsz = len;
3355                         len = 0;
3356                 } else if (page->index > eof_index) {
3357                         /*
3358                          * The VFS will not try to do readahead past the
3359                          * i_size, but it's possible that we have outstanding
3360                          * writes with gaps in the middle and the i_size hasn't
3361                          * caught up yet. Populate those with zeroed out pages
3362                          * to prevent the VFS from repeatedly attempting to
3363                          * fill them until the writes are flushed.
3364                          */
3365                         zero_user(page, 0, PAGE_SIZE);
3366                         lru_cache_add_file(page);
3367                         flush_dcache_page(page);
3368                         SetPageUptodate(page);
3369                         unlock_page(page);
3370                         put_page(page);
3371                         rdata->pages[i] = NULL;
3372                         rdata->nr_pages--;
3373                         continue;
3374                 } else {
3375                         /* no need to hold page hostage */
3376                         lru_cache_add_file(page);
3377                         unlock_page(page);
3378                         put_page(page);
3379                         rdata->pages[i] = NULL;
3380                         rdata->nr_pages--;
3381                         continue;
3382                 }
3383
3384                 result = cifs_read_page_from_socket(server, page, n);
3385                 if (result < 0)
3386                         break;
3387
3388                 rdata->got_bytes += result;
3389         }
3390
3391         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3392                                                 rdata->got_bytes : result;
3393 }
3394
3395 static int
3396 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3397                     unsigned int rsize, struct list_head *tmplist,
3398                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3399 {
3400         struct page *page, *tpage;
3401         unsigned int expected_index;
3402         int rc;
3403         gfp_t gfp = readahead_gfp_mask(mapping);
3404
3405         INIT_LIST_HEAD(tmplist);
3406
3407         page = list_entry(page_list->prev, struct page, lru);
3408
3409         /*
3410          * Lock the page and put it in the cache. Since no one else
3411          * should have access to this page, we're safe to simply set
3412          * PG_locked without checking it first.
3413          */
3414         __SetPageLocked(page);
3415         rc = add_to_page_cache_locked(page, mapping,
3416                                       page->index, gfp);
3417
3418         /* give up if we can't stick it in the cache */
3419         if (rc) {
3420                 __ClearPageLocked(page);
3421                 return rc;
3422         }
3423
3424         /* move first page to the tmplist */
3425         *offset = (loff_t)page->index << PAGE_SHIFT;
3426         *bytes = PAGE_SIZE;
3427         *nr_pages = 1;
3428         list_move_tail(&page->lru, tmplist);
3429
3430         /* now try and add more pages onto the request */
3431         expected_index = page->index + 1;
3432         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3433                 /* discontinuity ? */
3434                 if (page->index != expected_index)
3435                         break;
3436
3437                 /* would this page push the read over the rsize? */
3438                 if (*bytes + PAGE_SIZE > rsize)
3439                         break;
3440
3441                 __SetPageLocked(page);
3442                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3443                         __ClearPageLocked(page);
3444                         break;
3445                 }
3446                 list_move_tail(&page->lru, tmplist);
3447                 (*bytes) += PAGE_SIZE;
3448                 expected_index++;
3449                 (*nr_pages)++;
3450         }
3451         return rc;
3452 }
3453
3454 static int cifs_readpages(struct file *file, struct address_space *mapping,
3455         struct list_head *page_list, unsigned num_pages)
3456 {
3457         int rc;
3458         struct list_head tmplist;
3459         struct cifsFileInfo *open_file = file->private_data;
3460         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3461         struct TCP_Server_Info *server;
3462         pid_t pid;
3463
3464         /*
3465          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3466          * immediately if the cookie is negative
3467          *
3468          * After this point, every page in the list might have PG_fscache set,
3469          * so we will need to clean that up off of every page we don't use.
3470          */
3471         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3472                                          &num_pages);
3473         if (rc == 0)
3474                 return rc;
3475
3476         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3477                 pid = open_file->pid;
3478         else
3479                 pid = current->tgid;
3480
3481         rc = 0;
3482         server = tlink_tcon(open_file->tlink)->ses->server;
3483
3484         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3485                  __func__, file, mapping, num_pages);
3486
3487         /*
3488          * Start with the page at end of list and move it to private
3489          * list. Do the same with any following pages until we hit
3490          * the rsize limit, hit an index discontinuity, or run out of
3491          * pages. Issue the async read and then start the loop again
3492          * until the list is empty.
3493          *
3494          * Note that list order is important. The page_list is in
3495          * the order of declining indexes. When we put the pages in
3496          * the rdata->pages, then we want them in increasing order.
3497          */
3498         while (!list_empty(page_list)) {
3499                 unsigned int i, nr_pages, bytes, rsize;
3500                 loff_t offset;
3501                 struct page *page, *tpage;
3502                 struct cifs_readdata *rdata;
3503                 unsigned credits;
3504
3505                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3506                                                    &rsize, &credits);
3507                 if (rc)
3508                         break;
3509
3510                 /*
3511                  * Give up immediately if rsize is too small to read an entire
3512                  * page. The VFS will fall back to readpage. We should never
3513                  * reach this point however since we set ra_pages to 0 when the
3514                  * rsize is smaller than a cache page.
3515                  */
3516                 if (unlikely(rsize < PAGE_SIZE)) {
3517                         add_credits_and_wake_if(server, credits, 0);
3518                         return 0;
3519                 }
3520
3521                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3522                                          &nr_pages, &offset, &bytes);
3523                 if (rc) {
3524                         add_credits_and_wake_if(server, credits, 0);
3525                         break;
3526                 }
3527
3528                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3529                 if (!rdata) {
3530                         /* best to give up if we're out of mem */
3531                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3532                                 list_del(&page->lru);
3533                                 lru_cache_add_file(page);
3534                                 unlock_page(page);
3535                                 put_page(page);
3536                         }
3537                         rc = -ENOMEM;
3538                         add_credits_and_wake_if(server, credits, 0);
3539                         break;
3540                 }
3541
3542                 rdata->cfile = cifsFileInfo_get(open_file);
3543                 rdata->mapping = mapping;
3544                 rdata->offset = offset;
3545                 rdata->bytes = bytes;
3546                 rdata->pid = pid;
3547                 rdata->pagesz = PAGE_SIZE;
3548                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3549                 rdata->credits = credits;
3550
3551                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3552                         list_del(&page->lru);
3553                         rdata->pages[rdata->nr_pages++] = page;
3554                 }
3555
3556                 if (!rdata->cfile->invalidHandle ||
3557                     !cifs_reopen_file(rdata->cfile, true))
3558                         rc = server->ops->async_readv(rdata);
3559                 if (rc) {
3560                         add_credits_and_wake_if(server, rdata->credits, 0);
3561                         for (i = 0; i < rdata->nr_pages; i++) {
3562                                 page = rdata->pages[i];
3563                                 lru_cache_add_file(page);
3564                                 unlock_page(page);
3565                                 put_page(page);
3566                         }
3567                         /* Fallback to the readpage in error/reconnect cases */
3568                         kref_put(&rdata->refcount, cifs_readdata_release);
3569                         break;
3570                 }
3571
3572                 kref_put(&rdata->refcount, cifs_readdata_release);
3573         }
3574
3575         /* Any pages that have been shown to fscache but didn't get added to
3576          * the pagecache must be uncached before they get returned to the
3577          * allocator.
3578          */
3579         cifs_fscache_readpages_cancel(mapping->host, page_list);
3580         return rc;
3581 }
3582
3583 /*
3584  * cifs_readpage_worker must be called with the page pinned
3585  */
3586 static int cifs_readpage_worker(struct file *file, struct page *page,
3587         loff_t *poffset)
3588 {
3589         char *read_data;
3590         int rc;
3591
3592         /* Is the page cached? */
3593         rc = cifs_readpage_from_fscache(file_inode(file), page);
3594         if (rc == 0)
3595                 goto read_complete;
3596
3597         read_data = kmap(page);
3598         /* for reads over a certain size could initiate async read ahead */
3599
3600         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3601
3602         if (rc < 0)
3603                 goto io_error;
3604         else
3605                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3606
3607         file_inode(file)->i_atime =
3608                 current_time(file_inode(file));
3609
3610         if (PAGE_SIZE > rc)
3611                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3612
3613         flush_dcache_page(page);
3614         SetPageUptodate(page);
3615
3616         /* send this page to the cache */
3617         cifs_readpage_to_fscache(file_inode(file), page);
3618
3619         rc = 0;
3620
3621 io_error:
3622         kunmap(page);
3623         unlock_page(page);
3624
3625 read_complete:
3626         return rc;
3627 }
3628
3629 static int cifs_readpage(struct file *file, struct page *page)
3630 {
3631         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3632         int rc = -EACCES;
3633         unsigned int xid;
3634
3635         xid = get_xid();
3636
3637         if (file->private_data == NULL) {
3638                 rc = -EBADF;
3639                 free_xid(xid);
3640                 return rc;
3641         }
3642
3643         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3644                  page, (int)offset, (int)offset);
3645
3646         rc = cifs_readpage_worker(file, page, &offset);
3647
3648         free_xid(xid);
3649         return rc;
3650 }
3651
3652 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3653 {
3654         struct cifsFileInfo *open_file;
3655         struct cifs_tcon *tcon =
3656                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3657
3658         spin_lock(&tcon->open_file_lock);
3659         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3660                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3661                         spin_unlock(&tcon->open_file_lock);
3662                         return 1;
3663                 }
3664         }
3665         spin_unlock(&tcon->open_file_lock);
3666         return 0;
3667 }
3668
3669 /* We do not want to update the file size from server for inodes
3670    open for write - to avoid races with writepage extending
3671    the file - in the future we could consider allowing
3672    refreshing the inode only on increases in the file size
3673    but this is tricky to do without racing with writebehind
3674    page caching in the current Linux kernel design */
3675 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3676 {
3677         if (!cifsInode)
3678                 return true;
3679
3680         if (is_inode_writable(cifsInode)) {
3681                 /* This inode is open for write at least once */
3682                 struct cifs_sb_info *cifs_sb;
3683
3684                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3685                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3686                         /* since no page cache to corrupt on directio
3687                         we can change size safely */
3688                         return true;
3689                 }
3690
3691                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3692                         return true;
3693
3694                 return false;
3695         } else
3696                 return true;
3697 }
3698
3699 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3700                         loff_t pos, unsigned len, unsigned flags,
3701                         struct page **pagep, void **fsdata)
3702 {
3703         int oncethru = 0;
3704         pgoff_t index = pos >> PAGE_SHIFT;
3705         loff_t offset = pos & (PAGE_SIZE - 1);
3706         loff_t page_start = pos & PAGE_MASK;
3707         loff_t i_size;
3708         struct page *page;
3709         int rc = 0;
3710
3711         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3712
3713 start:
3714         page = grab_cache_page_write_begin(mapping, index, flags);
3715         if (!page) {
3716                 rc = -ENOMEM;
3717                 goto out;
3718         }
3719
3720         if (PageUptodate(page))
3721                 goto out;
3722
3723         /*
3724          * If we write a full page it will be up to date, no need to read from
3725          * the server. If the write is short, we'll end up doing a sync write
3726          * instead.
3727          */
3728         if (len == PAGE_SIZE)
3729                 goto out;
3730
3731         /*
3732          * optimize away the read when we have an oplock, and we're not
3733          * expecting to use any of the data we'd be reading in. That
3734          * is, when the page lies beyond the EOF, or straddles the EOF
3735          * and the write will cover all of the existing data.
3736          */
3737         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3738                 i_size = i_size_read(mapping->host);
3739                 if (page_start >= i_size ||
3740                     (offset == 0 && (pos + len) >= i_size)) {
3741                         zero_user_segments(page, 0, offset,
3742                                            offset + len,
3743                                            PAGE_SIZE);
3744                         /*
3745                          * PageChecked means that the parts of the page
3746                          * to which we're not writing are considered up
3747                          * to date. Once the data is copied to the
3748                          * page, it can be set uptodate.
3749                          */
3750                         SetPageChecked(page);
3751                         goto out;
3752                 }
3753         }
3754
3755         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3756                 /*
3757                  * might as well read a page, it is fast enough. If we get
3758                  * an error, we don't need to return it. cifs_write_end will
3759                  * do a sync write instead since PG_uptodate isn't set.
3760                  */
3761                 cifs_readpage_worker(file, page, &page_start);
3762                 put_page(page);
3763                 oncethru = 1;
3764                 goto start;
3765         } else {
3766                 /* we could try using another file handle if there is one -
3767                    but how would we lock it to prevent close of that handle
3768                    racing with this read? In any case
3769                    this will be written out by write_end so is fine */
3770         }
3771 out:
3772         *pagep = page;
3773         return rc;
3774 }
3775
3776 static int cifs_release_page(struct page *page, gfp_t gfp)
3777 {
3778         if (PagePrivate(page))
3779                 return 0;
3780
3781         return cifs_fscache_release_page(page, gfp);
3782 }
3783
3784 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3785                                  unsigned int length)
3786 {
3787         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3788
3789         if (offset == 0 && length == PAGE_SIZE)
3790                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3791 }
3792
3793 static int cifs_launder_page(struct page *page)
3794 {
3795         int rc = 0;
3796         loff_t range_start = page_offset(page);
3797         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3798         struct writeback_control wbc = {
3799                 .sync_mode = WB_SYNC_ALL,
3800                 .nr_to_write = 0,
3801                 .range_start = range_start,
3802                 .range_end = range_end,
3803         };
3804
3805         cifs_dbg(FYI, "Launder page: %p\n", page);
3806
3807         if (clear_page_dirty_for_io(page))
3808                 rc = cifs_writepage_locked(page, &wbc);
3809
3810         cifs_fscache_invalidate_page(page, page->mapping->host);
3811         return rc;
3812 }
3813
3814 void cifs_oplock_break(struct work_struct *work)
3815 {
3816         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3817                                                   oplock_break);
3818         struct inode *inode = d_inode(cfile->dentry);
3819         struct cifsInodeInfo *cinode = CIFS_I(inode);
3820         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3821         struct TCP_Server_Info *server = tcon->ses->server;
3822         int rc = 0;
3823
3824         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3825                         TASK_UNINTERRUPTIBLE);
3826
3827         server->ops->downgrade_oplock(server, cinode,
3828                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3829
3830         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3831                                                 cifs_has_mand_locks(cinode)) {
3832                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3833                          inode);
3834                 cinode->oplock = 0;
3835         }
3836
3837         if (inode && S_ISREG(inode->i_mode)) {
3838                 if (CIFS_CACHE_READ(cinode))
3839                         break_lease(inode, O_RDONLY);
3840                 else
3841                         break_lease(inode, O_WRONLY);
3842                 rc = filemap_fdatawrite(inode->i_mapping);
3843                 if (!CIFS_CACHE_READ(cinode)) {
3844                         rc = filemap_fdatawait(inode->i_mapping);
3845                         mapping_set_error(inode->i_mapping, rc);
3846                         cifs_zap_mapping(inode);
3847                 }
3848                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3849         }
3850
3851         rc = cifs_push_locks(cfile);
3852         if (rc)
3853                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3854
3855         /*
3856          * releasing stale oplock after recent reconnect of smb session using
3857          * a now incorrect file handle is not a data integrity issue but do
3858          * not bother sending an oplock release if session to server still is
3859          * disconnected since oplock already released by the server
3860          */
3861         if (!cfile->oplock_break_cancelled) {
3862                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3863                                                              cinode);
3864                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3865         }
3866         cifs_done_oplock_break(cinode);
3867 }
3868
3869 /*
3870  * The presence of cifs_direct_io() in the address space ops vector
3871  * allowes open() O_DIRECT flags which would have failed otherwise.
3872  *
3873  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3874  * so this method should never be called.
3875  *
3876  * Direct IO is not yet supported in the cached mode. 
3877  */
3878 static ssize_t
3879 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3880 {
3881         /*
3882          * FIXME
3883          * Eventually need to support direct IO for non forcedirectio mounts
3884          */
3885         return -EINVAL;
3886 }
3887
3888
3889 const struct address_space_operations cifs_addr_ops = {
3890         .readpage = cifs_readpage,
3891         .readpages = cifs_readpages,
3892         .writepage = cifs_writepage,
3893         .writepages = cifs_writepages,
3894         .write_begin = cifs_write_begin,
3895         .write_end = cifs_write_end,
3896         .set_page_dirty = __set_page_dirty_nobuffers,
3897         .releasepage = cifs_release_page,
3898         .direct_IO = cifs_direct_io,
3899         .invalidatepage = cifs_invalidate_page,
3900         .launder_page = cifs_launder_page,
3901 };
3902
3903 /*
3904  * cifs_readpages requires the server to support a buffer large enough to
3905  * contain the header plus one complete page of data.  Otherwise, we need
3906  * to leave cifs_readpages out of the address space operations.
3907  */
3908 const struct address_space_operations cifs_addr_ops_smallbuf = {
3909         .readpage = cifs_readpage,
3910         .writepage = cifs_writepage,
3911         .writepages = cifs_writepages,
3912         .write_begin = cifs_write_begin,
3913         .write_end = cifs_write_end,
3914         .set_page_dirty = __set_page_dirty_nobuffers,
3915         .releasepage = cifs_release_page,
3916         .invalidatepage = cifs_invalidate_page,
3917         .launder_page = cifs_launder_page,
3918 };