fs/cifs: reopen persistent handles on reconnect
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         oparms.tcon = tcon;
228         oparms.cifs_sb = cifs_sb;
229         oparms.desired_access = desired_access;
230         oparms.create_options = create_options;
231         oparms.disposition = disposition;
232         oparms.path = full_path;
233         oparms.fid = fid;
234         oparms.reconnect = false;
235
236         rc = server->ops->open(xid, &oparms, oplock, buf);
237
238         if (rc)
239                 goto out;
240
241         if (tcon->unix_ext)
242                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
243                                               xid);
244         else
245                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246                                          xid, fid);
247
248 out:
249         kfree(buf);
250         return rc;
251 }
252
253 static bool
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
255 {
256         struct cifs_fid_locks *cur;
257         bool has_locks = false;
258
259         down_read(&cinode->lock_sem);
260         list_for_each_entry(cur, &cinode->llist, llist) {
261                 if (!list_empty(&cur->locks)) {
262                         has_locks = true;
263                         break;
264                 }
265         }
266         up_read(&cinode->lock_sem);
267         return has_locks;
268 }
269
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272                   struct tcon_link *tlink, __u32 oplock)
273 {
274         struct dentry *dentry = file_dentry(file);
275         struct inode *inode = d_inode(dentry);
276         struct cifsInodeInfo *cinode = CIFS_I(inode);
277         struct cifsFileInfo *cfile;
278         struct cifs_fid_locks *fdlocks;
279         struct cifs_tcon *tcon = tlink_tcon(tlink);
280         struct TCP_Server_Info *server = tcon->ses->server;
281
282         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
283         if (cfile == NULL)
284                 return cfile;
285
286         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
287         if (!fdlocks) {
288                 kfree(cfile);
289                 return NULL;
290         }
291
292         INIT_LIST_HEAD(&fdlocks->locks);
293         fdlocks->cfile = cfile;
294         cfile->llist = fdlocks;
295         down_write(&cinode->lock_sem);
296         list_add(&fdlocks->llist, &cinode->llist);
297         up_write(&cinode->lock_sem);
298
299         cfile->count = 1;
300         cfile->pid = current->tgid;
301         cfile->uid = current_fsuid();
302         cfile->dentry = dget(dentry);
303         cfile->f_flags = file->f_flags;
304         cfile->invalidHandle = false;
305         cfile->tlink = cifs_get_tlink(tlink);
306         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307         mutex_init(&cfile->fh_mutex);
308         spin_lock_init(&cfile->file_info_lock);
309
310         cifs_sb_active(inode->i_sb);
311
312         /*
313          * If the server returned a read oplock and we have mandatory brlocks,
314          * set oplock level to None.
315          */
316         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
318                 oplock = 0;
319         }
320
321         spin_lock(&tcon->open_file_lock);
322         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323                 oplock = fid->pending_open->oplock;
324         list_del(&fid->pending_open->olist);
325
326         fid->purge_cache = false;
327         server->ops->set_fid(cfile, fid, oplock);
328
329         list_add(&cfile->tlist, &tcon->openFileList);
330
331         /* if readable file instance put first in list*/
332         if (file->f_mode & FMODE_READ)
333                 list_add(&cfile->flist, &cinode->openFileList);
334         else
335                 list_add_tail(&cfile->flist, &cinode->openFileList);
336         spin_unlock(&tcon->open_file_lock);
337
338         if (fid->purge_cache)
339                 cifs_zap_mapping(inode);
340
341         file->private_data = cfile;
342         return cfile;
343 }
344
345 struct cifsFileInfo *
346 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 {
348         spin_lock(&cifs_file->file_info_lock);
349         cifsFileInfo_get_locked(cifs_file);
350         spin_unlock(&cifs_file->file_info_lock);
351         return cifs_file;
352 }
353
354 /*
355  * Release a reference on the file private data. This may involve closing
356  * the filehandle out on the server. Must be called without holding
357  * tcon->open_file_lock and cifs_file->file_info_lock.
358  */
359 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 {
361         struct inode *inode = d_inode(cifs_file->dentry);
362         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
363         struct TCP_Server_Info *server = tcon->ses->server;
364         struct cifsInodeInfo *cifsi = CIFS_I(inode);
365         struct super_block *sb = inode->i_sb;
366         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
367         struct cifsLockInfo *li, *tmp;
368         struct cifs_fid fid;
369         struct cifs_pending_open open;
370         bool oplock_break_cancelled;
371
372         spin_lock(&tcon->open_file_lock);
373
374         spin_lock(&cifs_file->file_info_lock);
375         if (--cifs_file->count > 0) {
376                 spin_unlock(&cifs_file->file_info_lock);
377                 spin_unlock(&tcon->open_file_lock);
378                 return;
379         }
380         spin_unlock(&cifs_file->file_info_lock);
381
382         if (server->ops->get_lease_key)
383                 server->ops->get_lease_key(inode, &fid);
384
385         /* store open in pending opens to make sure we don't miss lease break */
386         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
387
388         /* remove it from the lists */
389         list_del(&cifs_file->flist);
390         list_del(&cifs_file->tlist);
391
392         if (list_empty(&cifsi->openFileList)) {
393                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
394                          d_inode(cifs_file->dentry));
395                 /*
396                  * In strict cache mode we need invalidate mapping on the last
397                  * close  because it may cause a error when we open this file
398                  * again and get at least level II oplock.
399                  */
400                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
401                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
402                 cifs_set_oplock_level(cifsi, 0);
403         }
404
405         spin_unlock(&tcon->open_file_lock);
406
407         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
408
409         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
410                 struct TCP_Server_Info *server = tcon->ses->server;
411                 unsigned int xid;
412
413                 xid = get_xid();
414                 if (server->ops->close)
415                         server->ops->close(xid, tcon, &cifs_file->fid);
416                 _free_xid(xid);
417         }
418
419         if (oplock_break_cancelled)
420                 cifs_done_oplock_break(cifsi);
421
422         cifs_del_pending_open(&open);
423
424         /*
425          * Delete any outstanding lock records. We'll lose them when the file
426          * is closed anyway.
427          */
428         down_write(&cifsi->lock_sem);
429         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
430                 list_del(&li->llist);
431                 cifs_del_lock_waiters(li);
432                 kfree(li);
433         }
434         list_del(&cifs_file->llist->llist);
435         kfree(cifs_file->llist);
436         up_write(&cifsi->lock_sem);
437
438         cifs_put_tlink(cifs_file->tlink);
439         dput(cifs_file->dentry);
440         cifs_sb_deactive(sb);
441         kfree(cifs_file);
442 }
443
444 int cifs_open(struct inode *inode, struct file *file)
445
446 {
447         int rc = -EACCES;
448         unsigned int xid;
449         __u32 oplock;
450         struct cifs_sb_info *cifs_sb;
451         struct TCP_Server_Info *server;
452         struct cifs_tcon *tcon;
453         struct tcon_link *tlink;
454         struct cifsFileInfo *cfile = NULL;
455         char *full_path = NULL;
456         bool posix_open_ok = false;
457         struct cifs_fid fid;
458         struct cifs_pending_open open;
459
460         xid = get_xid();
461
462         cifs_sb = CIFS_SB(inode->i_sb);
463         tlink = cifs_sb_tlink(cifs_sb);
464         if (IS_ERR(tlink)) {
465                 free_xid(xid);
466                 return PTR_ERR(tlink);
467         }
468         tcon = tlink_tcon(tlink);
469         server = tcon->ses->server;
470
471         full_path = build_path_from_dentry(file_dentry(file));
472         if (full_path == NULL) {
473                 rc = -ENOMEM;
474                 goto out;
475         }
476
477         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
478                  inode, file->f_flags, full_path);
479
480         if (file->f_flags & O_DIRECT &&
481             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
482                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
483                         file->f_op = &cifs_file_direct_nobrl_ops;
484                 else
485                         file->f_op = &cifs_file_direct_ops;
486         }
487
488         if (server->oplocks)
489                 oplock = REQ_OPLOCK;
490         else
491                 oplock = 0;
492
493         if (!tcon->broken_posix_open && tcon->unix_ext &&
494             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
495                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
496                 /* can not refresh inode info since size could be stale */
497                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
498                                 cifs_sb->mnt_file_mode /* ignored */,
499                                 file->f_flags, &oplock, &fid.netfid, xid);
500                 if (rc == 0) {
501                         cifs_dbg(FYI, "posix open succeeded\n");
502                         posix_open_ok = true;
503                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
504                         if (tcon->ses->serverNOS)
505                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
506                                          tcon->ses->serverName,
507                                          tcon->ses->serverNOS);
508                         tcon->broken_posix_open = true;
509                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
510                          (rc != -EOPNOTSUPP)) /* path not found or net err */
511                         goto out;
512                 /*
513                  * Else fallthrough to retry open the old way on network i/o
514                  * or DFS errors.
515                  */
516         }
517
518         if (server->ops->get_lease_key)
519                 server->ops->get_lease_key(inode, &fid);
520
521         cifs_add_pending_open(&fid, tlink, &open);
522
523         if (!posix_open_ok) {
524                 if (server->ops->get_lease_key)
525                         server->ops->get_lease_key(inode, &fid);
526
527                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
528                                   file->f_flags, &oplock, &fid, xid);
529                 if (rc) {
530                         cifs_del_pending_open(&open);
531                         goto out;
532                 }
533         }
534
535         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
536         if (cfile == NULL) {
537                 if (server->ops->close)
538                         server->ops->close(xid, tcon, &fid);
539                 cifs_del_pending_open(&open);
540                 rc = -ENOMEM;
541                 goto out;
542         }
543
544         cifs_fscache_set_inode_cookie(inode, file);
545
546         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
547                 /*
548                  * Time to set mode which we can not set earlier due to
549                  * problems creating new read-only files.
550                  */
551                 struct cifs_unix_set_info_args args = {
552                         .mode   = inode->i_mode,
553                         .uid    = INVALID_UID, /* no change */
554                         .gid    = INVALID_GID, /* no change */
555                         .ctime  = NO_CHANGE_64,
556                         .atime  = NO_CHANGE_64,
557                         .mtime  = NO_CHANGE_64,
558                         .device = 0,
559                 };
560                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
561                                        cfile->pid);
562         }
563
564 out:
565         kfree(full_path);
566         free_xid(xid);
567         cifs_put_tlink(tlink);
568         return rc;
569 }
570
571 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
572
573 /*
574  * Try to reacquire byte range locks that were released when session
575  * to server was lost.
576  */
577 static int
578 cifs_relock_file(struct cifsFileInfo *cfile)
579 {
580         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
581         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
582         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
583         int rc = 0;
584
585         down_read(&cinode->lock_sem);
586         if (cinode->can_cache_brlcks) {
587                 /* can cache locks - no need to relock */
588                 up_read(&cinode->lock_sem);
589                 return rc;
590         }
591
592         if (cap_unix(tcon->ses) &&
593             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
594             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
595                 rc = cifs_push_posix_locks(cfile);
596         else
597                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
598
599         up_read(&cinode->lock_sem);
600         return rc;
601 }
602
603 static int
604 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
605 {
606         int rc = -EACCES;
607         unsigned int xid;
608         __u32 oplock;
609         struct cifs_sb_info *cifs_sb;
610         struct cifs_tcon *tcon;
611         struct TCP_Server_Info *server;
612         struct cifsInodeInfo *cinode;
613         struct inode *inode;
614         char *full_path = NULL;
615         int desired_access;
616         int disposition = FILE_OPEN;
617         int create_options = CREATE_NOT_DIR;
618         struct cifs_open_parms oparms;
619
620         xid = get_xid();
621         mutex_lock(&cfile->fh_mutex);
622         if (!cfile->invalidHandle) {
623                 mutex_unlock(&cfile->fh_mutex);
624                 rc = 0;
625                 free_xid(xid);
626                 return rc;
627         }
628
629         inode = d_inode(cfile->dentry);
630         cifs_sb = CIFS_SB(inode->i_sb);
631         tcon = tlink_tcon(cfile->tlink);
632         server = tcon->ses->server;
633
634         /*
635          * Can not grab rename sem here because various ops, including those
636          * that already have the rename sem can end up causing writepage to get
637          * called and if the server was down that means we end up here, and we
638          * can never tell if the caller already has the rename_sem.
639          */
640         full_path = build_path_from_dentry(cfile->dentry);
641         if (full_path == NULL) {
642                 rc = -ENOMEM;
643                 mutex_unlock(&cfile->fh_mutex);
644                 free_xid(xid);
645                 return rc;
646         }
647
648         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
649                  inode, cfile->f_flags, full_path);
650
651         if (tcon->ses->server->oplocks)
652                 oplock = REQ_OPLOCK;
653         else
654                 oplock = 0;
655
656         if (tcon->unix_ext && cap_unix(tcon->ses) &&
657             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
658                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
659                 /*
660                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
661                  * original open. Must mask them off for a reopen.
662                  */
663                 unsigned int oflags = cfile->f_flags &
664                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
665
666                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
667                                      cifs_sb->mnt_file_mode /* ignored */,
668                                      oflags, &oplock, &cfile->fid.netfid, xid);
669                 if (rc == 0) {
670                         cifs_dbg(FYI, "posix reopen succeeded\n");
671                         oparms.reconnect = true;
672                         goto reopen_success;
673                 }
674                 /*
675                  * fallthrough to retry open the old way on errors, especially
676                  * in the reconnect path it is important to retry hard
677                  */
678         }
679
680         desired_access = cifs_convert_flags(cfile->f_flags);
681
682         if (backup_cred(cifs_sb))
683                 create_options |= CREATE_OPEN_BACKUP_INTENT;
684
685         if (server->ops->get_lease_key)
686                 server->ops->get_lease_key(inode, &cfile->fid);
687
688         oparms.tcon = tcon;
689         oparms.cifs_sb = cifs_sb;
690         oparms.desired_access = desired_access;
691         oparms.create_options = create_options;
692         oparms.disposition = disposition;
693         oparms.path = full_path;
694         oparms.fid = &cfile->fid;
695         oparms.reconnect = true;
696
697         /*
698          * Can not refresh inode by passing in file_info buf to be returned by
699          * ops->open and then calling get_inode_info with returned buf since
700          * file might have write behind data that needs to be flushed and server
701          * version of file size can be stale. If we knew for sure that inode was
702          * not dirty locally we could do this.
703          */
704         rc = server->ops->open(xid, &oparms, &oplock, NULL);
705         if (rc == -ENOENT && oparms.reconnect == false) {
706                 /* durable handle timeout is expired - open the file again */
707                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
708                 /* indicate that we need to relock the file */
709                 oparms.reconnect = true;
710         }
711
712         if (rc) {
713                 mutex_unlock(&cfile->fh_mutex);
714                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
715                 cifs_dbg(FYI, "oplock: %d\n", oplock);
716                 goto reopen_error_exit;
717         }
718
719 reopen_success:
720         cfile->invalidHandle = false;
721         mutex_unlock(&cfile->fh_mutex);
722         cinode = CIFS_I(inode);
723
724         if (can_flush) {
725                 rc = filemap_write_and_wait(inode->i_mapping);
726                 mapping_set_error(inode->i_mapping, rc);
727
728                 if (tcon->unix_ext)
729                         rc = cifs_get_inode_info_unix(&inode, full_path,
730                                                       inode->i_sb, xid);
731                 else
732                         rc = cifs_get_inode_info(&inode, full_path, NULL,
733                                                  inode->i_sb, xid, NULL);
734         }
735         /*
736          * Else we are writing out data to server already and could deadlock if
737          * we tried to flush data, and since we do not know if we have data that
738          * would invalidate the current end of file on the server we can not go
739          * to the server to get the new inode info.
740          */
741
742         server->ops->set_fid(cfile, &cfile->fid, oplock);
743         if (oparms.reconnect)
744                 cifs_relock_file(cfile);
745
746 reopen_error_exit:
747         kfree(full_path);
748         free_xid(xid);
749         return rc;
750 }
751
752 int cifs_close(struct inode *inode, struct file *file)
753 {
754         if (file->private_data != NULL) {
755                 cifsFileInfo_put(file->private_data);
756                 file->private_data = NULL;
757         }
758
759         /* return code from the ->release op is always ignored */
760         return 0;
761 }
762
763 void
764 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
765 {
766         struct cifsFileInfo *open_file = NULL;
767         struct list_head *tmp;
768         struct list_head *tmp1;
769
770         /* list all files open on tree connection, reopen resilient handles  */
771         spin_lock(&tcon->open_file_lock);
772         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
773                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
774                 spin_unlock(&tcon->open_file_lock);
775                 cifs_reopen_file(open_file, false /* do not flush */);
776                 spin_lock(&tcon->open_file_lock);
777         }
778         spin_unlock(&tcon->open_file_lock);
779 }
780
781 int cifs_closedir(struct inode *inode, struct file *file)
782 {
783         int rc = 0;
784         unsigned int xid;
785         struct cifsFileInfo *cfile = file->private_data;
786         struct cifs_tcon *tcon;
787         struct TCP_Server_Info *server;
788         char *buf;
789
790         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
791
792         if (cfile == NULL)
793                 return rc;
794
795         xid = get_xid();
796         tcon = tlink_tcon(cfile->tlink);
797         server = tcon->ses->server;
798
799         cifs_dbg(FYI, "Freeing private data in close dir\n");
800         spin_lock(&cfile->file_info_lock);
801         if (server->ops->dir_needs_close(cfile)) {
802                 cfile->invalidHandle = true;
803                 spin_unlock(&cfile->file_info_lock);
804                 if (server->ops->close_dir)
805                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
806                 else
807                         rc = -ENOSYS;
808                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
809                 /* not much we can do if it fails anyway, ignore rc */
810                 rc = 0;
811         } else
812                 spin_unlock(&cfile->file_info_lock);
813
814         buf = cfile->srch_inf.ntwrk_buf_start;
815         if (buf) {
816                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
817                 cfile->srch_inf.ntwrk_buf_start = NULL;
818                 if (cfile->srch_inf.smallBuf)
819                         cifs_small_buf_release(buf);
820                 else
821                         cifs_buf_release(buf);
822         }
823
824         cifs_put_tlink(cfile->tlink);
825         kfree(file->private_data);
826         file->private_data = NULL;
827         /* BB can we lock the filestruct while this is going on? */
828         free_xid(xid);
829         return rc;
830 }
831
832 static struct cifsLockInfo *
833 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
834 {
835         struct cifsLockInfo *lock =
836                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
837         if (!lock)
838                 return lock;
839         lock->offset = offset;
840         lock->length = length;
841         lock->type = type;
842         lock->pid = current->tgid;
843         INIT_LIST_HEAD(&lock->blist);
844         init_waitqueue_head(&lock->block_q);
845         return lock;
846 }
847
848 void
849 cifs_del_lock_waiters(struct cifsLockInfo *lock)
850 {
851         struct cifsLockInfo *li, *tmp;
852         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
853                 list_del_init(&li->blist);
854                 wake_up(&li->block_q);
855         }
856 }
857
858 #define CIFS_LOCK_OP    0
859 #define CIFS_READ_OP    1
860 #define CIFS_WRITE_OP   2
861
862 /* @rw_check : 0 - no op, 1 - read, 2 - write */
863 static bool
864 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
865                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
866                             struct cifsLockInfo **conf_lock, int rw_check)
867 {
868         struct cifsLockInfo *li;
869         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
870         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
871
872         list_for_each_entry(li, &fdlocks->locks, llist) {
873                 if (offset + length <= li->offset ||
874                     offset >= li->offset + li->length)
875                         continue;
876                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
877                     server->ops->compare_fids(cfile, cur_cfile)) {
878                         /* shared lock prevents write op through the same fid */
879                         if (!(li->type & server->vals->shared_lock_type) ||
880                             rw_check != CIFS_WRITE_OP)
881                                 continue;
882                 }
883                 if ((type & server->vals->shared_lock_type) &&
884                     ((server->ops->compare_fids(cfile, cur_cfile) &&
885                      current->tgid == li->pid) || type == li->type))
886                         continue;
887                 if (conf_lock)
888                         *conf_lock = li;
889                 return true;
890         }
891         return false;
892 }
893
894 bool
895 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
896                         __u8 type, struct cifsLockInfo **conf_lock,
897                         int rw_check)
898 {
899         bool rc = false;
900         struct cifs_fid_locks *cur;
901         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
902
903         list_for_each_entry(cur, &cinode->llist, llist) {
904                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
905                                                  cfile, conf_lock, rw_check);
906                 if (rc)
907                         break;
908         }
909
910         return rc;
911 }
912
913 /*
914  * Check if there is another lock that prevents us to set the lock (mandatory
915  * style). If such a lock exists, update the flock structure with its
916  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
917  * or leave it the same if we can't. Returns 0 if we don't need to request to
918  * the server or 1 otherwise.
919  */
920 static int
921 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
922                __u8 type, struct file_lock *flock)
923 {
924         int rc = 0;
925         struct cifsLockInfo *conf_lock;
926         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
927         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
928         bool exist;
929
930         down_read(&cinode->lock_sem);
931
932         exist = cifs_find_lock_conflict(cfile, offset, length, type,
933                                         &conf_lock, CIFS_LOCK_OP);
934         if (exist) {
935                 flock->fl_start = conf_lock->offset;
936                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
937                 flock->fl_pid = conf_lock->pid;
938                 if (conf_lock->type & server->vals->shared_lock_type)
939                         flock->fl_type = F_RDLCK;
940                 else
941                         flock->fl_type = F_WRLCK;
942         } else if (!cinode->can_cache_brlcks)
943                 rc = 1;
944         else
945                 flock->fl_type = F_UNLCK;
946
947         up_read(&cinode->lock_sem);
948         return rc;
949 }
950
951 static void
952 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
953 {
954         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
955         down_write(&cinode->lock_sem);
956         list_add_tail(&lock->llist, &cfile->llist->locks);
957         up_write(&cinode->lock_sem);
958 }
959
960 /*
961  * Set the byte-range lock (mandatory style). Returns:
962  * 1) 0, if we set the lock and don't need to request to the server;
963  * 2) 1, if no locks prevent us but we need to request to the server;
964  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
965  */
966 static int
967 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
968                  bool wait)
969 {
970         struct cifsLockInfo *conf_lock;
971         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
972         bool exist;
973         int rc = 0;
974
975 try_again:
976         exist = false;
977         down_write(&cinode->lock_sem);
978
979         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
980                                         lock->type, &conf_lock, CIFS_LOCK_OP);
981         if (!exist && cinode->can_cache_brlcks) {
982                 list_add_tail(&lock->llist, &cfile->llist->locks);
983                 up_write(&cinode->lock_sem);
984                 return rc;
985         }
986
987         if (!exist)
988                 rc = 1;
989         else if (!wait)
990                 rc = -EACCES;
991         else {
992                 list_add_tail(&lock->blist, &conf_lock->blist);
993                 up_write(&cinode->lock_sem);
994                 rc = wait_event_interruptible(lock->block_q,
995                                         (lock->blist.prev == &lock->blist) &&
996                                         (lock->blist.next == &lock->blist));
997                 if (!rc)
998                         goto try_again;
999                 down_write(&cinode->lock_sem);
1000                 list_del_init(&lock->blist);
1001         }
1002
1003         up_write(&cinode->lock_sem);
1004         return rc;
1005 }
1006
1007 /*
1008  * Check if there is another lock that prevents us to set the lock (posix
1009  * style). If such a lock exists, update the flock structure with its
1010  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1011  * or leave it the same if we can't. Returns 0 if we don't need to request to
1012  * the server or 1 otherwise.
1013  */
1014 static int
1015 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1016 {
1017         int rc = 0;
1018         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1019         unsigned char saved_type = flock->fl_type;
1020
1021         if ((flock->fl_flags & FL_POSIX) == 0)
1022                 return 1;
1023
1024         down_read(&cinode->lock_sem);
1025         posix_test_lock(file, flock);
1026
1027         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1028                 flock->fl_type = saved_type;
1029                 rc = 1;
1030         }
1031
1032         up_read(&cinode->lock_sem);
1033         return rc;
1034 }
1035
1036 /*
1037  * Set the byte-range lock (posix style). Returns:
1038  * 1) 0, if we set the lock and don't need to request to the server;
1039  * 2) 1, if we need to request to the server;
1040  * 3) <0, if the error occurs while setting the lock.
1041  */
1042 static int
1043 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1044 {
1045         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1046         int rc = 1;
1047
1048         if ((flock->fl_flags & FL_POSIX) == 0)
1049                 return rc;
1050
1051 try_again:
1052         down_write(&cinode->lock_sem);
1053         if (!cinode->can_cache_brlcks) {
1054                 up_write(&cinode->lock_sem);
1055                 return rc;
1056         }
1057
1058         rc = posix_lock_file(file, flock, NULL);
1059         up_write(&cinode->lock_sem);
1060         if (rc == FILE_LOCK_DEFERRED) {
1061                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1062                 if (!rc)
1063                         goto try_again;
1064                 posix_unblock_lock(flock);
1065         }
1066         return rc;
1067 }
1068
1069 int
1070 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1071 {
1072         unsigned int xid;
1073         int rc = 0, stored_rc;
1074         struct cifsLockInfo *li, *tmp;
1075         struct cifs_tcon *tcon;
1076         unsigned int num, max_num, max_buf;
1077         LOCKING_ANDX_RANGE *buf, *cur;
1078         int types[] = {LOCKING_ANDX_LARGE_FILES,
1079                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1080         int i;
1081
1082         xid = get_xid();
1083         tcon = tlink_tcon(cfile->tlink);
1084
1085         /*
1086          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1087          * and check it for zero before using.
1088          */
1089         max_buf = tcon->ses->server->maxBuf;
1090         if (!max_buf) {
1091                 free_xid(xid);
1092                 return -EINVAL;
1093         }
1094
1095         max_num = (max_buf - sizeof(struct smb_hdr)) /
1096                                                 sizeof(LOCKING_ANDX_RANGE);
1097         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1098         if (!buf) {
1099                 free_xid(xid);
1100                 return -ENOMEM;
1101         }
1102
1103         for (i = 0; i < 2; i++) {
1104                 cur = buf;
1105                 num = 0;
1106                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1107                         if (li->type != types[i])
1108                                 continue;
1109                         cur->Pid = cpu_to_le16(li->pid);
1110                         cur->LengthLow = cpu_to_le32((u32)li->length);
1111                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1112                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1113                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1114                         if (++num == max_num) {
1115                                 stored_rc = cifs_lockv(xid, tcon,
1116                                                        cfile->fid.netfid,
1117                                                        (__u8)li->type, 0, num,
1118                                                        buf);
1119                                 if (stored_rc)
1120                                         rc = stored_rc;
1121                                 cur = buf;
1122                                 num = 0;
1123                         } else
1124                                 cur++;
1125                 }
1126
1127                 if (num) {
1128                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1129                                                (__u8)types[i], 0, num, buf);
1130                         if (stored_rc)
1131                                 rc = stored_rc;
1132                 }
1133         }
1134
1135         kfree(buf);
1136         free_xid(xid);
1137         return rc;
1138 }
1139
1140 static __u32
1141 hash_lockowner(fl_owner_t owner)
1142 {
1143         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1144 }
1145
1146 struct lock_to_push {
1147         struct list_head llist;
1148         __u64 offset;
1149         __u64 length;
1150         __u32 pid;
1151         __u16 netfid;
1152         __u8 type;
1153 };
1154
1155 static int
1156 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1157 {
1158         struct inode *inode = d_inode(cfile->dentry);
1159         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1160         struct file_lock *flock;
1161         struct file_lock_context *flctx = inode->i_flctx;
1162         unsigned int count = 0, i;
1163         int rc = 0, xid, type;
1164         struct list_head locks_to_send, *el;
1165         struct lock_to_push *lck, *tmp;
1166         __u64 length;
1167
1168         xid = get_xid();
1169
1170         if (!flctx)
1171                 goto out;
1172
1173         spin_lock(&flctx->flc_lock);
1174         list_for_each(el, &flctx->flc_posix) {
1175                 count++;
1176         }
1177         spin_unlock(&flctx->flc_lock);
1178
1179         INIT_LIST_HEAD(&locks_to_send);
1180
1181         /*
1182          * Allocating count locks is enough because no FL_POSIX locks can be
1183          * added to the list while we are holding cinode->lock_sem that
1184          * protects locking operations of this inode.
1185          */
1186         for (i = 0; i < count; i++) {
1187                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1188                 if (!lck) {
1189                         rc = -ENOMEM;
1190                         goto err_out;
1191                 }
1192                 list_add_tail(&lck->llist, &locks_to_send);
1193         }
1194
1195         el = locks_to_send.next;
1196         spin_lock(&flctx->flc_lock);
1197         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1198                 if (el == &locks_to_send) {
1199                         /*
1200                          * The list ended. We don't have enough allocated
1201                          * structures - something is really wrong.
1202                          */
1203                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1204                         break;
1205                 }
1206                 length = 1 + flock->fl_end - flock->fl_start;
1207                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1208                         type = CIFS_RDLCK;
1209                 else
1210                         type = CIFS_WRLCK;
1211                 lck = list_entry(el, struct lock_to_push, llist);
1212                 lck->pid = hash_lockowner(flock->fl_owner);
1213                 lck->netfid = cfile->fid.netfid;
1214                 lck->length = length;
1215                 lck->type = type;
1216                 lck->offset = flock->fl_start;
1217         }
1218         spin_unlock(&flctx->flc_lock);
1219
1220         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1221                 int stored_rc;
1222
1223                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1224                                              lck->offset, lck->length, NULL,
1225                                              lck->type, 0);
1226                 if (stored_rc)
1227                         rc = stored_rc;
1228                 list_del(&lck->llist);
1229                 kfree(lck);
1230         }
1231
1232 out:
1233         free_xid(xid);
1234         return rc;
1235 err_out:
1236         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1237                 list_del(&lck->llist);
1238                 kfree(lck);
1239         }
1240         goto out;
1241 }
1242
1243 static int
1244 cifs_push_locks(struct cifsFileInfo *cfile)
1245 {
1246         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1247         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1248         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1249         int rc = 0;
1250
1251         /* we are going to update can_cache_brlcks here - need a write access */
1252         down_write(&cinode->lock_sem);
1253         if (!cinode->can_cache_brlcks) {
1254                 up_write(&cinode->lock_sem);
1255                 return rc;
1256         }
1257
1258         if (cap_unix(tcon->ses) &&
1259             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1260             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1261                 rc = cifs_push_posix_locks(cfile);
1262         else
1263                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1264
1265         cinode->can_cache_brlcks = false;
1266         up_write(&cinode->lock_sem);
1267         return rc;
1268 }
1269
1270 static void
1271 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1272                 bool *wait_flag, struct TCP_Server_Info *server)
1273 {
1274         if (flock->fl_flags & FL_POSIX)
1275                 cifs_dbg(FYI, "Posix\n");
1276         if (flock->fl_flags & FL_FLOCK)
1277                 cifs_dbg(FYI, "Flock\n");
1278         if (flock->fl_flags & FL_SLEEP) {
1279                 cifs_dbg(FYI, "Blocking lock\n");
1280                 *wait_flag = true;
1281         }
1282         if (flock->fl_flags & FL_ACCESS)
1283                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1284         if (flock->fl_flags & FL_LEASE)
1285                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1286         if (flock->fl_flags &
1287             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1288                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1289                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1290
1291         *type = server->vals->large_lock_type;
1292         if (flock->fl_type == F_WRLCK) {
1293                 cifs_dbg(FYI, "F_WRLCK\n");
1294                 *type |= server->vals->exclusive_lock_type;
1295                 *lock = 1;
1296         } else if (flock->fl_type == F_UNLCK) {
1297                 cifs_dbg(FYI, "F_UNLCK\n");
1298                 *type |= server->vals->unlock_lock_type;
1299                 *unlock = 1;
1300                 /* Check if unlock includes more than one lock range */
1301         } else if (flock->fl_type == F_RDLCK) {
1302                 cifs_dbg(FYI, "F_RDLCK\n");
1303                 *type |= server->vals->shared_lock_type;
1304                 *lock = 1;
1305         } else if (flock->fl_type == F_EXLCK) {
1306                 cifs_dbg(FYI, "F_EXLCK\n");
1307                 *type |= server->vals->exclusive_lock_type;
1308                 *lock = 1;
1309         } else if (flock->fl_type == F_SHLCK) {
1310                 cifs_dbg(FYI, "F_SHLCK\n");
1311                 *type |= server->vals->shared_lock_type;
1312                 *lock = 1;
1313         } else
1314                 cifs_dbg(FYI, "Unknown type of lock\n");
1315 }
1316
1317 static int
1318 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1319            bool wait_flag, bool posix_lck, unsigned int xid)
1320 {
1321         int rc = 0;
1322         __u64 length = 1 + flock->fl_end - flock->fl_start;
1323         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1324         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1325         struct TCP_Server_Info *server = tcon->ses->server;
1326         __u16 netfid = cfile->fid.netfid;
1327
1328         if (posix_lck) {
1329                 int posix_lock_type;
1330
1331                 rc = cifs_posix_lock_test(file, flock);
1332                 if (!rc)
1333                         return rc;
1334
1335                 if (type & server->vals->shared_lock_type)
1336                         posix_lock_type = CIFS_RDLCK;
1337                 else
1338                         posix_lock_type = CIFS_WRLCK;
1339                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1340                                       hash_lockowner(flock->fl_owner),
1341                                       flock->fl_start, length, flock,
1342                                       posix_lock_type, wait_flag);
1343                 return rc;
1344         }
1345
1346         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1347         if (!rc)
1348                 return rc;
1349
1350         /* BB we could chain these into one lock request BB */
1351         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1352                                     1, 0, false);
1353         if (rc == 0) {
1354                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1355                                             type, 0, 1, false);
1356                 flock->fl_type = F_UNLCK;
1357                 if (rc != 0)
1358                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1359                                  rc);
1360                 return 0;
1361         }
1362
1363         if (type & server->vals->shared_lock_type) {
1364                 flock->fl_type = F_WRLCK;
1365                 return 0;
1366         }
1367
1368         type &= ~server->vals->exclusive_lock_type;
1369
1370         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1371                                     type | server->vals->shared_lock_type,
1372                                     1, 0, false);
1373         if (rc == 0) {
1374                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1375                         type | server->vals->shared_lock_type, 0, 1, false);
1376                 flock->fl_type = F_RDLCK;
1377                 if (rc != 0)
1378                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1379                                  rc);
1380         } else
1381                 flock->fl_type = F_WRLCK;
1382
1383         return 0;
1384 }
1385
1386 void
1387 cifs_move_llist(struct list_head *source, struct list_head *dest)
1388 {
1389         struct list_head *li, *tmp;
1390         list_for_each_safe(li, tmp, source)
1391                 list_move(li, dest);
1392 }
1393
1394 void
1395 cifs_free_llist(struct list_head *llist)
1396 {
1397         struct cifsLockInfo *li, *tmp;
1398         list_for_each_entry_safe(li, tmp, llist, llist) {
1399                 cifs_del_lock_waiters(li);
1400                 list_del(&li->llist);
1401                 kfree(li);
1402         }
1403 }
1404
1405 int
1406 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1407                   unsigned int xid)
1408 {
1409         int rc = 0, stored_rc;
1410         int types[] = {LOCKING_ANDX_LARGE_FILES,
1411                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1412         unsigned int i;
1413         unsigned int max_num, num, max_buf;
1414         LOCKING_ANDX_RANGE *buf, *cur;
1415         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1416         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1417         struct cifsLockInfo *li, *tmp;
1418         __u64 length = 1 + flock->fl_end - flock->fl_start;
1419         struct list_head tmp_llist;
1420
1421         INIT_LIST_HEAD(&tmp_llist);
1422
1423         /*
1424          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1425          * and check it for zero before using.
1426          */
1427         max_buf = tcon->ses->server->maxBuf;
1428         if (!max_buf)
1429                 return -EINVAL;
1430
1431         max_num = (max_buf - sizeof(struct smb_hdr)) /
1432                                                 sizeof(LOCKING_ANDX_RANGE);
1433         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1434         if (!buf)
1435                 return -ENOMEM;
1436
1437         down_write(&cinode->lock_sem);
1438         for (i = 0; i < 2; i++) {
1439                 cur = buf;
1440                 num = 0;
1441                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1442                         if (flock->fl_start > li->offset ||
1443                             (flock->fl_start + length) <
1444                             (li->offset + li->length))
1445                                 continue;
1446                         if (current->tgid != li->pid)
1447                                 continue;
1448                         if (types[i] != li->type)
1449                                 continue;
1450                         if (cinode->can_cache_brlcks) {
1451                                 /*
1452                                  * We can cache brlock requests - simply remove
1453                                  * a lock from the file's list.
1454                                  */
1455                                 list_del(&li->llist);
1456                                 cifs_del_lock_waiters(li);
1457                                 kfree(li);
1458                                 continue;
1459                         }
1460                         cur->Pid = cpu_to_le16(li->pid);
1461                         cur->LengthLow = cpu_to_le32((u32)li->length);
1462                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1463                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1464                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1465                         /*
1466                          * We need to save a lock here to let us add it again to
1467                          * the file's list if the unlock range request fails on
1468                          * the server.
1469                          */
1470                         list_move(&li->llist, &tmp_llist);
1471                         if (++num == max_num) {
1472                                 stored_rc = cifs_lockv(xid, tcon,
1473                                                        cfile->fid.netfid,
1474                                                        li->type, num, 0, buf);
1475                                 if (stored_rc) {
1476                                         /*
1477                                          * We failed on the unlock range
1478                                          * request - add all locks from the tmp
1479                                          * list to the head of the file's list.
1480                                          */
1481                                         cifs_move_llist(&tmp_llist,
1482                                                         &cfile->llist->locks);
1483                                         rc = stored_rc;
1484                                 } else
1485                                         /*
1486                                          * The unlock range request succeed -
1487                                          * free the tmp list.
1488                                          */
1489                                         cifs_free_llist(&tmp_llist);
1490                                 cur = buf;
1491                                 num = 0;
1492                         } else
1493                                 cur++;
1494                 }
1495                 if (num) {
1496                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1497                                                types[i], num, 0, buf);
1498                         if (stored_rc) {
1499                                 cifs_move_llist(&tmp_llist,
1500                                                 &cfile->llist->locks);
1501                                 rc = stored_rc;
1502                         } else
1503                                 cifs_free_llist(&tmp_llist);
1504                 }
1505         }
1506
1507         up_write(&cinode->lock_sem);
1508         kfree(buf);
1509         return rc;
1510 }
1511
1512 static int
1513 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1514            bool wait_flag, bool posix_lck, int lock, int unlock,
1515            unsigned int xid)
1516 {
1517         int rc = 0;
1518         __u64 length = 1 + flock->fl_end - flock->fl_start;
1519         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1520         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1521         struct TCP_Server_Info *server = tcon->ses->server;
1522         struct inode *inode = d_inode(cfile->dentry);
1523
1524         if (posix_lck) {
1525                 int posix_lock_type;
1526
1527                 rc = cifs_posix_lock_set(file, flock);
1528                 if (!rc || rc < 0)
1529                         return rc;
1530
1531                 if (type & server->vals->shared_lock_type)
1532                         posix_lock_type = CIFS_RDLCK;
1533                 else
1534                         posix_lock_type = CIFS_WRLCK;
1535
1536                 if (unlock == 1)
1537                         posix_lock_type = CIFS_UNLCK;
1538
1539                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1540                                       hash_lockowner(flock->fl_owner),
1541                                       flock->fl_start, length,
1542                                       NULL, posix_lock_type, wait_flag);
1543                 goto out;
1544         }
1545
1546         if (lock) {
1547                 struct cifsLockInfo *lock;
1548
1549                 lock = cifs_lock_init(flock->fl_start, length, type);
1550                 if (!lock)
1551                         return -ENOMEM;
1552
1553                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1554                 if (rc < 0) {
1555                         kfree(lock);
1556                         return rc;
1557                 }
1558                 if (!rc)
1559                         goto out;
1560
1561                 /*
1562                  * Windows 7 server can delay breaking lease from read to None
1563                  * if we set a byte-range lock on a file - break it explicitly
1564                  * before sending the lock to the server to be sure the next
1565                  * read won't conflict with non-overlapted locks due to
1566                  * pagereading.
1567                  */
1568                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1569                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1570                         cifs_zap_mapping(inode);
1571                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1572                                  inode);
1573                         CIFS_I(inode)->oplock = 0;
1574                 }
1575
1576                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1577                                             type, 1, 0, wait_flag);
1578                 if (rc) {
1579                         kfree(lock);
1580                         return rc;
1581                 }
1582
1583                 cifs_lock_add(cfile, lock);
1584         } else if (unlock)
1585                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1586
1587 out:
1588         if (flock->fl_flags & FL_POSIX && !rc)
1589                 rc = locks_lock_file_wait(file, flock);
1590         return rc;
1591 }
1592
1593 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1594 {
1595         int rc, xid;
1596         int lock = 0, unlock = 0;
1597         bool wait_flag = false;
1598         bool posix_lck = false;
1599         struct cifs_sb_info *cifs_sb;
1600         struct cifs_tcon *tcon;
1601         struct cifsInodeInfo *cinode;
1602         struct cifsFileInfo *cfile;
1603         __u16 netfid;
1604         __u32 type;
1605
1606         rc = -EACCES;
1607         xid = get_xid();
1608
1609         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1610                  cmd, flock->fl_flags, flock->fl_type,
1611                  flock->fl_start, flock->fl_end);
1612
1613         cfile = (struct cifsFileInfo *)file->private_data;
1614         tcon = tlink_tcon(cfile->tlink);
1615
1616         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1617                         tcon->ses->server);
1618
1619         cifs_sb = CIFS_FILE_SB(file);
1620         netfid = cfile->fid.netfid;
1621         cinode = CIFS_I(file_inode(file));
1622
1623         if (cap_unix(tcon->ses) &&
1624             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1625             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1626                 posix_lck = true;
1627         /*
1628          * BB add code here to normalize offset and length to account for
1629          * negative length which we can not accept over the wire.
1630          */
1631         if (IS_GETLK(cmd)) {
1632                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1633                 free_xid(xid);
1634                 return rc;
1635         }
1636
1637         if (!lock && !unlock) {
1638                 /*
1639                  * if no lock or unlock then nothing to do since we do not
1640                  * know what it is
1641                  */
1642                 free_xid(xid);
1643                 return -EOPNOTSUPP;
1644         }
1645
1646         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1647                         xid);
1648         free_xid(xid);
1649         return rc;
1650 }
1651
1652 /*
1653  * update the file size (if needed) after a write. Should be called with
1654  * the inode->i_lock held
1655  */
1656 void
1657 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1658                       unsigned int bytes_written)
1659 {
1660         loff_t end_of_write = offset + bytes_written;
1661
1662         if (end_of_write > cifsi->server_eof)
1663                 cifsi->server_eof = end_of_write;
1664 }
1665
1666 static ssize_t
1667 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1668            size_t write_size, loff_t *offset)
1669 {
1670         int rc = 0;
1671         unsigned int bytes_written = 0;
1672         unsigned int total_written;
1673         struct cifs_sb_info *cifs_sb;
1674         struct cifs_tcon *tcon;
1675         struct TCP_Server_Info *server;
1676         unsigned int xid;
1677         struct dentry *dentry = open_file->dentry;
1678         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1679         struct cifs_io_parms io_parms;
1680
1681         cifs_sb = CIFS_SB(dentry->d_sb);
1682
1683         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1684                  write_size, *offset, dentry);
1685
1686         tcon = tlink_tcon(open_file->tlink);
1687         server = tcon->ses->server;
1688
1689         if (!server->ops->sync_write)
1690                 return -ENOSYS;
1691
1692         xid = get_xid();
1693
1694         for (total_written = 0; write_size > total_written;
1695              total_written += bytes_written) {
1696                 rc = -EAGAIN;
1697                 while (rc == -EAGAIN) {
1698                         struct kvec iov[2];
1699                         unsigned int len;
1700
1701                         if (open_file->invalidHandle) {
1702                                 /* we could deadlock if we called
1703                                    filemap_fdatawait from here so tell
1704                                    reopen_file not to flush data to
1705                                    server now */
1706                                 rc = cifs_reopen_file(open_file, false);
1707                                 if (rc != 0)
1708                                         break;
1709                         }
1710
1711                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1712                                   (unsigned int)write_size - total_written);
1713                         /* iov[0] is reserved for smb header */
1714                         iov[1].iov_base = (char *)write_data + total_written;
1715                         iov[1].iov_len = len;
1716                         io_parms.pid = pid;
1717                         io_parms.tcon = tcon;
1718                         io_parms.offset = *offset;
1719                         io_parms.length = len;
1720                         rc = server->ops->sync_write(xid, &open_file->fid,
1721                                         &io_parms, &bytes_written, iov, 1);
1722                 }
1723                 if (rc || (bytes_written == 0)) {
1724                         if (total_written)
1725                                 break;
1726                         else {
1727                                 free_xid(xid);
1728                                 return rc;
1729                         }
1730                 } else {
1731                         spin_lock(&d_inode(dentry)->i_lock);
1732                         cifs_update_eof(cifsi, *offset, bytes_written);
1733                         spin_unlock(&d_inode(dentry)->i_lock);
1734                         *offset += bytes_written;
1735                 }
1736         }
1737
1738         cifs_stats_bytes_written(tcon, total_written);
1739
1740         if (total_written > 0) {
1741                 spin_lock(&d_inode(dentry)->i_lock);
1742                 if (*offset > d_inode(dentry)->i_size)
1743                         i_size_write(d_inode(dentry), *offset);
1744                 spin_unlock(&d_inode(dentry)->i_lock);
1745         }
1746         mark_inode_dirty_sync(d_inode(dentry));
1747         free_xid(xid);
1748         return total_written;
1749 }
1750
1751 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1752                                         bool fsuid_only)
1753 {
1754         struct cifsFileInfo *open_file = NULL;
1755         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1756         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1757
1758         /* only filter by fsuid on multiuser mounts */
1759         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1760                 fsuid_only = false;
1761
1762         spin_lock(&tcon->open_file_lock);
1763         /* we could simply get the first_list_entry since write-only entries
1764            are always at the end of the list but since the first entry might
1765            have a close pending, we go through the whole list */
1766         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1767                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1768                         continue;
1769                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1770                         if (!open_file->invalidHandle) {
1771                                 /* found a good file */
1772                                 /* lock it so it will not be closed on us */
1773                                 cifsFileInfo_get(open_file);
1774                                 spin_unlock(&tcon->open_file_lock);
1775                                 return open_file;
1776                         } /* else might as well continue, and look for
1777                              another, or simply have the caller reopen it
1778                              again rather than trying to fix this handle */
1779                 } else /* write only file */
1780                         break; /* write only files are last so must be done */
1781         }
1782         spin_unlock(&tcon->open_file_lock);
1783         return NULL;
1784 }
1785
1786 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1787                                         bool fsuid_only)
1788 {
1789         struct cifsFileInfo *open_file, *inv_file = NULL;
1790         struct cifs_sb_info *cifs_sb;
1791         struct cifs_tcon *tcon;
1792         bool any_available = false;
1793         int rc;
1794         unsigned int refind = 0;
1795
1796         /* Having a null inode here (because mapping->host was set to zero by
1797         the VFS or MM) should not happen but we had reports of on oops (due to
1798         it being zero) during stress testcases so we need to check for it */
1799
1800         if (cifs_inode == NULL) {
1801                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1802                 dump_stack();
1803                 return NULL;
1804         }
1805
1806         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1807         tcon = cifs_sb_master_tcon(cifs_sb);
1808
1809         /* only filter by fsuid on multiuser mounts */
1810         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1811                 fsuid_only = false;
1812
1813         spin_lock(&tcon->open_file_lock);
1814 refind_writable:
1815         if (refind > MAX_REOPEN_ATT) {
1816                 spin_unlock(&tcon->open_file_lock);
1817                 return NULL;
1818         }
1819         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1820                 if (!any_available && open_file->pid != current->tgid)
1821                         continue;
1822                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1823                         continue;
1824                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1825                         if (!open_file->invalidHandle) {
1826                                 /* found a good writable file */
1827                                 cifsFileInfo_get(open_file);
1828                                 spin_unlock(&tcon->open_file_lock);
1829                                 return open_file;
1830                         } else {
1831                                 if (!inv_file)
1832                                         inv_file = open_file;
1833                         }
1834                 }
1835         }
1836         /* couldn't find useable FH with same pid, try any available */
1837         if (!any_available) {
1838                 any_available = true;
1839                 goto refind_writable;
1840         }
1841
1842         if (inv_file) {
1843                 any_available = false;
1844                 cifsFileInfo_get(inv_file);
1845         }
1846
1847         spin_unlock(&tcon->open_file_lock);
1848
1849         if (inv_file) {
1850                 rc = cifs_reopen_file(inv_file, false);
1851                 if (!rc)
1852                         return inv_file;
1853                 else {
1854                         spin_lock(&tcon->open_file_lock);
1855                         list_move_tail(&inv_file->flist,
1856                                         &cifs_inode->openFileList);
1857                         spin_unlock(&tcon->open_file_lock);
1858                         cifsFileInfo_put(inv_file);
1859                         ++refind;
1860                         inv_file = NULL;
1861                         spin_lock(&tcon->open_file_lock);
1862                         goto refind_writable;
1863                 }
1864         }
1865
1866         return NULL;
1867 }
1868
1869 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1870 {
1871         struct address_space *mapping = page->mapping;
1872         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1873         char *write_data;
1874         int rc = -EFAULT;
1875         int bytes_written = 0;
1876         struct inode *inode;
1877         struct cifsFileInfo *open_file;
1878
1879         if (!mapping || !mapping->host)
1880                 return -EFAULT;
1881
1882         inode = page->mapping->host;
1883
1884         offset += (loff_t)from;
1885         write_data = kmap(page);
1886         write_data += from;
1887
1888         if ((to > PAGE_SIZE) || (from > to)) {
1889                 kunmap(page);
1890                 return -EIO;
1891         }
1892
1893         /* racing with truncate? */
1894         if (offset > mapping->host->i_size) {
1895                 kunmap(page);
1896                 return 0; /* don't care */
1897         }
1898
1899         /* check to make sure that we are not extending the file */
1900         if (mapping->host->i_size - offset < (loff_t)to)
1901                 to = (unsigned)(mapping->host->i_size - offset);
1902
1903         open_file = find_writable_file(CIFS_I(mapping->host), false);
1904         if (open_file) {
1905                 bytes_written = cifs_write(open_file, open_file->pid,
1906                                            write_data, to - from, &offset);
1907                 cifsFileInfo_put(open_file);
1908                 /* Does mm or vfs already set times? */
1909                 inode->i_atime = inode->i_mtime = current_time(inode);
1910                 if ((bytes_written > 0) && (offset))
1911                         rc = 0;
1912                 else if (bytes_written < 0)
1913                         rc = bytes_written;
1914         } else {
1915                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1916                 rc = -EIO;
1917         }
1918
1919         kunmap(page);
1920         return rc;
1921 }
1922
1923 static struct cifs_writedata *
1924 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1925                           pgoff_t end, pgoff_t *index,
1926                           unsigned int *found_pages)
1927 {
1928         unsigned int nr_pages;
1929         struct page **pages;
1930         struct cifs_writedata *wdata;
1931
1932         wdata = cifs_writedata_alloc((unsigned int)tofind,
1933                                      cifs_writev_complete);
1934         if (!wdata)
1935                 return NULL;
1936
1937         /*
1938          * find_get_pages_tag seems to return a max of 256 on each
1939          * iteration, so we must call it several times in order to
1940          * fill the array or the wsize is effectively limited to
1941          * 256 * PAGE_SIZE.
1942          */
1943         *found_pages = 0;
1944         pages = wdata->pages;
1945         do {
1946                 nr_pages = find_get_pages_tag(mapping, index,
1947                                               PAGECACHE_TAG_DIRTY, tofind,
1948                                               pages);
1949                 *found_pages += nr_pages;
1950                 tofind -= nr_pages;
1951                 pages += nr_pages;
1952         } while (nr_pages && tofind && *index <= end);
1953
1954         return wdata;
1955 }
1956
1957 static unsigned int
1958 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1959                     struct address_space *mapping,
1960                     struct writeback_control *wbc,
1961                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1962 {
1963         unsigned int nr_pages = 0, i;
1964         struct page *page;
1965
1966         for (i = 0; i < found_pages; i++) {
1967                 page = wdata->pages[i];
1968                 /*
1969                  * At this point we hold neither mapping->tree_lock nor
1970                  * lock on the page itself: the page may be truncated or
1971                  * invalidated (changing page->mapping to NULL), or even
1972                  * swizzled back from swapper_space to tmpfs file
1973                  * mapping
1974                  */
1975
1976                 if (nr_pages == 0)
1977                         lock_page(page);
1978                 else if (!trylock_page(page))
1979                         break;
1980
1981                 if (unlikely(page->mapping != mapping)) {
1982                         unlock_page(page);
1983                         break;
1984                 }
1985
1986                 if (!wbc->range_cyclic && page->index > end) {
1987                         *done = true;
1988                         unlock_page(page);
1989                         break;
1990                 }
1991
1992                 if (*next && (page->index != *next)) {
1993                         /* Not next consecutive page */
1994                         unlock_page(page);
1995                         break;
1996                 }
1997
1998                 if (wbc->sync_mode != WB_SYNC_NONE)
1999                         wait_on_page_writeback(page);
2000
2001                 if (PageWriteback(page) ||
2002                                 !clear_page_dirty_for_io(page)) {
2003                         unlock_page(page);
2004                         break;
2005                 }
2006
2007                 /*
2008                  * This actually clears the dirty bit in the radix tree.
2009                  * See cifs_writepage() for more commentary.
2010                  */
2011                 set_page_writeback(page);
2012                 if (page_offset(page) >= i_size_read(mapping->host)) {
2013                         *done = true;
2014                         unlock_page(page);
2015                         end_page_writeback(page);
2016                         break;
2017                 }
2018
2019                 wdata->pages[i] = page;
2020                 *next = page->index + 1;
2021                 ++nr_pages;
2022         }
2023
2024         /* reset index to refind any pages skipped */
2025         if (nr_pages == 0)
2026                 *index = wdata->pages[0]->index + 1;
2027
2028         /* put any pages we aren't going to use */
2029         for (i = nr_pages; i < found_pages; i++) {
2030                 put_page(wdata->pages[i]);
2031                 wdata->pages[i] = NULL;
2032         }
2033
2034         return nr_pages;
2035 }
2036
2037 static int
2038 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2039                  struct address_space *mapping, struct writeback_control *wbc)
2040 {
2041         int rc = 0;
2042         struct TCP_Server_Info *server;
2043         unsigned int i;
2044
2045         wdata->sync_mode = wbc->sync_mode;
2046         wdata->nr_pages = nr_pages;
2047         wdata->offset = page_offset(wdata->pages[0]);
2048         wdata->pagesz = PAGE_SIZE;
2049         wdata->tailsz = min(i_size_read(mapping->host) -
2050                         page_offset(wdata->pages[nr_pages - 1]),
2051                         (loff_t)PAGE_SIZE);
2052         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2053
2054         if (wdata->cfile != NULL)
2055                 cifsFileInfo_put(wdata->cfile);
2056         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2057         if (!wdata->cfile) {
2058                 cifs_dbg(VFS, "No writable handles for inode\n");
2059                 rc = -EBADF;
2060         } else {
2061                 wdata->pid = wdata->cfile->pid;
2062                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2063                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2064         }
2065
2066         for (i = 0; i < nr_pages; ++i)
2067                 unlock_page(wdata->pages[i]);
2068
2069         return rc;
2070 }
2071
2072 static int cifs_writepages(struct address_space *mapping,
2073                            struct writeback_control *wbc)
2074 {
2075         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2076         struct TCP_Server_Info *server;
2077         bool done = false, scanned = false, range_whole = false;
2078         pgoff_t end, index;
2079         struct cifs_writedata *wdata;
2080         int rc = 0;
2081
2082         /*
2083          * If wsize is smaller than the page cache size, default to writing
2084          * one page at a time via cifs_writepage
2085          */
2086         if (cifs_sb->wsize < PAGE_SIZE)
2087                 return generic_writepages(mapping, wbc);
2088
2089         if (wbc->range_cyclic) {
2090                 index = mapping->writeback_index; /* Start from prev offset */
2091                 end = -1;
2092         } else {
2093                 index = wbc->range_start >> PAGE_SHIFT;
2094                 end = wbc->range_end >> PAGE_SHIFT;
2095                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2096                         range_whole = true;
2097                 scanned = true;
2098         }
2099         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2100 retry:
2101         while (!done && index <= end) {
2102                 unsigned int i, nr_pages, found_pages, wsize, credits;
2103                 pgoff_t next = 0, tofind, saved_index = index;
2104
2105                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2106                                                    &wsize, &credits);
2107                 if (rc)
2108                         break;
2109
2110                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2111
2112                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2113                                                   &found_pages);
2114                 if (!wdata) {
2115                         rc = -ENOMEM;
2116                         add_credits_and_wake_if(server, credits, 0);
2117                         break;
2118                 }
2119
2120                 if (found_pages == 0) {
2121                         kref_put(&wdata->refcount, cifs_writedata_release);
2122                         add_credits_and_wake_if(server, credits, 0);
2123                         break;
2124                 }
2125
2126                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2127                                                end, &index, &next, &done);
2128
2129                 /* nothing to write? */
2130                 if (nr_pages == 0) {
2131                         kref_put(&wdata->refcount, cifs_writedata_release);
2132                         add_credits_and_wake_if(server, credits, 0);
2133                         continue;
2134                 }
2135
2136                 wdata->credits = credits;
2137
2138                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2139
2140                 /* send failure -- clean up the mess */
2141                 if (rc != 0) {
2142                         add_credits_and_wake_if(server, wdata->credits, 0);
2143                         for (i = 0; i < nr_pages; ++i) {
2144                                 if (rc == -EAGAIN)
2145                                         redirty_page_for_writepage(wbc,
2146                                                            wdata->pages[i]);
2147                                 else
2148                                         SetPageError(wdata->pages[i]);
2149                                 end_page_writeback(wdata->pages[i]);
2150                                 put_page(wdata->pages[i]);
2151                         }
2152                         if (rc != -EAGAIN)
2153                                 mapping_set_error(mapping, rc);
2154                 }
2155                 kref_put(&wdata->refcount, cifs_writedata_release);
2156
2157                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2158                         index = saved_index;
2159                         continue;
2160                 }
2161
2162                 wbc->nr_to_write -= nr_pages;
2163                 if (wbc->nr_to_write <= 0)
2164                         done = true;
2165
2166                 index = next;
2167         }
2168
2169         if (!scanned && !done) {
2170                 /*
2171                  * We hit the last page and there is more work to be done: wrap
2172                  * back to the start of the file
2173                  */
2174                 scanned = true;
2175                 index = 0;
2176                 goto retry;
2177         }
2178
2179         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2180                 mapping->writeback_index = index;
2181
2182         return rc;
2183 }
2184
2185 static int
2186 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2187 {
2188         int rc;
2189         unsigned int xid;
2190
2191         xid = get_xid();
2192 /* BB add check for wbc flags */
2193         get_page(page);
2194         if (!PageUptodate(page))
2195                 cifs_dbg(FYI, "ppw - page not up to date\n");
2196
2197         /*
2198          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2199          *
2200          * A writepage() implementation always needs to do either this,
2201          * or re-dirty the page with "redirty_page_for_writepage()" in
2202          * the case of a failure.
2203          *
2204          * Just unlocking the page will cause the radix tree tag-bits
2205          * to fail to update with the state of the page correctly.
2206          */
2207         set_page_writeback(page);
2208 retry_write:
2209         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2210         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2211                 goto retry_write;
2212         else if (rc == -EAGAIN)
2213                 redirty_page_for_writepage(wbc, page);
2214         else if (rc != 0)
2215                 SetPageError(page);
2216         else
2217                 SetPageUptodate(page);
2218         end_page_writeback(page);
2219         put_page(page);
2220         free_xid(xid);
2221         return rc;
2222 }
2223
2224 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2225 {
2226         int rc = cifs_writepage_locked(page, wbc);
2227         unlock_page(page);
2228         return rc;
2229 }
2230
2231 static int cifs_write_end(struct file *file, struct address_space *mapping,
2232                         loff_t pos, unsigned len, unsigned copied,
2233                         struct page *page, void *fsdata)
2234 {
2235         int rc;
2236         struct inode *inode = mapping->host;
2237         struct cifsFileInfo *cfile = file->private_data;
2238         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2239         __u32 pid;
2240
2241         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2242                 pid = cfile->pid;
2243         else
2244                 pid = current->tgid;
2245
2246         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2247                  page, pos, copied);
2248
2249         if (PageChecked(page)) {
2250                 if (copied == len)
2251                         SetPageUptodate(page);
2252                 ClearPageChecked(page);
2253         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2254                 SetPageUptodate(page);
2255
2256         if (!PageUptodate(page)) {
2257                 char *page_data;
2258                 unsigned offset = pos & (PAGE_SIZE - 1);
2259                 unsigned int xid;
2260
2261                 xid = get_xid();
2262                 /* this is probably better than directly calling
2263                    partialpage_write since in this function the file handle is
2264                    known which we might as well leverage */
2265                 /* BB check if anything else missing out of ppw
2266                    such as updating last write time */
2267                 page_data = kmap(page);
2268                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2269                 /* if (rc < 0) should we set writebehind rc? */
2270                 kunmap(page);
2271
2272                 free_xid(xid);
2273         } else {
2274                 rc = copied;
2275                 pos += copied;
2276                 set_page_dirty(page);
2277         }
2278
2279         if (rc > 0) {
2280                 spin_lock(&inode->i_lock);
2281                 if (pos > inode->i_size)
2282                         i_size_write(inode, pos);
2283                 spin_unlock(&inode->i_lock);
2284         }
2285
2286         unlock_page(page);
2287         put_page(page);
2288
2289         return rc;
2290 }
2291
2292 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2293                       int datasync)
2294 {
2295         unsigned int xid;
2296         int rc = 0;
2297         struct cifs_tcon *tcon;
2298         struct TCP_Server_Info *server;
2299         struct cifsFileInfo *smbfile = file->private_data;
2300         struct inode *inode = file_inode(file);
2301         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2302
2303         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2304         if (rc)
2305                 return rc;
2306         inode_lock(inode);
2307
2308         xid = get_xid();
2309
2310         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2311                  file, datasync);
2312
2313         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2314                 rc = cifs_zap_mapping(inode);
2315                 if (rc) {
2316                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2317                         rc = 0; /* don't care about it in fsync */
2318                 }
2319         }
2320
2321         tcon = tlink_tcon(smbfile->tlink);
2322         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2323                 server = tcon->ses->server;
2324                 if (server->ops->flush)
2325                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2326                 else
2327                         rc = -ENOSYS;
2328         }
2329
2330         free_xid(xid);
2331         inode_unlock(inode);
2332         return rc;
2333 }
2334
2335 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2336 {
2337         unsigned int xid;
2338         int rc = 0;
2339         struct cifs_tcon *tcon;
2340         struct TCP_Server_Info *server;
2341         struct cifsFileInfo *smbfile = file->private_data;
2342         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2343         struct inode *inode = file->f_mapping->host;
2344
2345         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2346         if (rc)
2347                 return rc;
2348         inode_lock(inode);
2349
2350         xid = get_xid();
2351
2352         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2353                  file, datasync);
2354
2355         tcon = tlink_tcon(smbfile->tlink);
2356         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2357                 server = tcon->ses->server;
2358                 if (server->ops->flush)
2359                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2360                 else
2361                         rc = -ENOSYS;
2362         }
2363
2364         free_xid(xid);
2365         inode_unlock(inode);
2366         return rc;
2367 }
2368
2369 /*
2370  * As file closes, flush all cached write data for this inode checking
2371  * for write behind errors.
2372  */
2373 int cifs_flush(struct file *file, fl_owner_t id)
2374 {
2375         struct inode *inode = file_inode(file);
2376         int rc = 0;
2377
2378         if (file->f_mode & FMODE_WRITE)
2379                 rc = filemap_write_and_wait(inode->i_mapping);
2380
2381         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2382
2383         return rc;
2384 }
2385
2386 static int
2387 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2388 {
2389         int rc = 0;
2390         unsigned long i;
2391
2392         for (i = 0; i < num_pages; i++) {
2393                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2394                 if (!pages[i]) {
2395                         /*
2396                          * save number of pages we have already allocated and
2397                          * return with ENOMEM error
2398                          */
2399                         num_pages = i;
2400                         rc = -ENOMEM;
2401                         break;
2402                 }
2403         }
2404
2405         if (rc) {
2406                 for (i = 0; i < num_pages; i++)
2407                         put_page(pages[i]);
2408         }
2409         return rc;
2410 }
2411
2412 static inline
2413 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2414 {
2415         size_t num_pages;
2416         size_t clen;
2417
2418         clen = min_t(const size_t, len, wsize);
2419         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2420
2421         if (cur_len)
2422                 *cur_len = clen;
2423
2424         return num_pages;
2425 }
2426
2427 static void
2428 cifs_uncached_writedata_release(struct kref *refcount)
2429 {
2430         int i;
2431         struct cifs_writedata *wdata = container_of(refcount,
2432                                         struct cifs_writedata, refcount);
2433
2434         for (i = 0; i < wdata->nr_pages; i++)
2435                 put_page(wdata->pages[i]);
2436         cifs_writedata_release(refcount);
2437 }
2438
2439 static void
2440 cifs_uncached_writev_complete(struct work_struct *work)
2441 {
2442         struct cifs_writedata *wdata = container_of(work,
2443                                         struct cifs_writedata, work);
2444         struct inode *inode = d_inode(wdata->cfile->dentry);
2445         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2446
2447         spin_lock(&inode->i_lock);
2448         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2449         if (cifsi->server_eof > inode->i_size)
2450                 i_size_write(inode, cifsi->server_eof);
2451         spin_unlock(&inode->i_lock);
2452
2453         complete(&wdata->done);
2454
2455         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2456 }
2457
2458 static int
2459 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2460                       size_t *len, unsigned long *num_pages)
2461 {
2462         size_t save_len, copied, bytes, cur_len = *len;
2463         unsigned long i, nr_pages = *num_pages;
2464
2465         save_len = cur_len;
2466         for (i = 0; i < nr_pages; i++) {
2467                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2468                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2469                 cur_len -= copied;
2470                 /*
2471                  * If we didn't copy as much as we expected, then that
2472                  * may mean we trod into an unmapped area. Stop copying
2473                  * at that point. On the next pass through the big
2474                  * loop, we'll likely end up getting a zero-length
2475                  * write and bailing out of it.
2476                  */
2477                 if (copied < bytes)
2478                         break;
2479         }
2480         cur_len = save_len - cur_len;
2481         *len = cur_len;
2482
2483         /*
2484          * If we have no data to send, then that probably means that
2485          * the copy above failed altogether. That's most likely because
2486          * the address in the iovec was bogus. Return -EFAULT and let
2487          * the caller free anything we allocated and bail out.
2488          */
2489         if (!cur_len)
2490                 return -EFAULT;
2491
2492         /*
2493          * i + 1 now represents the number of pages we actually used in
2494          * the copy phase above.
2495          */
2496         *num_pages = i + 1;
2497         return 0;
2498 }
2499
2500 static int
2501 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2502                      struct cifsFileInfo *open_file,
2503                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2504 {
2505         int rc = 0;
2506         size_t cur_len;
2507         unsigned long nr_pages, num_pages, i;
2508         struct cifs_writedata *wdata;
2509         struct iov_iter saved_from = *from;
2510         loff_t saved_offset = offset;
2511         pid_t pid;
2512         struct TCP_Server_Info *server;
2513
2514         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2515                 pid = open_file->pid;
2516         else
2517                 pid = current->tgid;
2518
2519         server = tlink_tcon(open_file->tlink)->ses->server;
2520
2521         do {
2522                 unsigned int wsize, credits;
2523
2524                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2525                                                    &wsize, &credits);
2526                 if (rc)
2527                         break;
2528
2529                 nr_pages = get_numpages(wsize, len, &cur_len);
2530                 wdata = cifs_writedata_alloc(nr_pages,
2531                                              cifs_uncached_writev_complete);
2532                 if (!wdata) {
2533                         rc = -ENOMEM;
2534                         add_credits_and_wake_if(server, credits, 0);
2535                         break;
2536                 }
2537
2538                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2539                 if (rc) {
2540                         kfree(wdata);
2541                         add_credits_and_wake_if(server, credits, 0);
2542                         break;
2543                 }
2544
2545                 num_pages = nr_pages;
2546                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2547                 if (rc) {
2548                         for (i = 0; i < nr_pages; i++)
2549                                 put_page(wdata->pages[i]);
2550                         kfree(wdata);
2551                         add_credits_and_wake_if(server, credits, 0);
2552                         break;
2553                 }
2554
2555                 /*
2556                  * Bring nr_pages down to the number of pages we actually used,
2557                  * and free any pages that we didn't use.
2558                  */
2559                 for ( ; nr_pages > num_pages; nr_pages--)
2560                         put_page(wdata->pages[nr_pages - 1]);
2561
2562                 wdata->sync_mode = WB_SYNC_ALL;
2563                 wdata->nr_pages = nr_pages;
2564                 wdata->offset = (__u64)offset;
2565                 wdata->cfile = cifsFileInfo_get(open_file);
2566                 wdata->pid = pid;
2567                 wdata->bytes = cur_len;
2568                 wdata->pagesz = PAGE_SIZE;
2569                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2570                 wdata->credits = credits;
2571
2572                 if (!wdata->cfile->invalidHandle ||
2573                     !cifs_reopen_file(wdata->cfile, false))
2574                         rc = server->ops->async_writev(wdata,
2575                                         cifs_uncached_writedata_release);
2576                 if (rc) {
2577                         add_credits_and_wake_if(server, wdata->credits, 0);
2578                         kref_put(&wdata->refcount,
2579                                  cifs_uncached_writedata_release);
2580                         if (rc == -EAGAIN) {
2581                                 *from = saved_from;
2582                                 iov_iter_advance(from, offset - saved_offset);
2583                                 continue;
2584                         }
2585                         break;
2586                 }
2587
2588                 list_add_tail(&wdata->list, wdata_list);
2589                 offset += cur_len;
2590                 len -= cur_len;
2591         } while (len > 0);
2592
2593         return rc;
2594 }
2595
2596 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2597 {
2598         struct file *file = iocb->ki_filp;
2599         ssize_t total_written = 0;
2600         struct cifsFileInfo *open_file;
2601         struct cifs_tcon *tcon;
2602         struct cifs_sb_info *cifs_sb;
2603         struct cifs_writedata *wdata, *tmp;
2604         struct list_head wdata_list;
2605         struct iov_iter saved_from = *from;
2606         int rc;
2607
2608         /*
2609          * BB - optimize the way when signing is disabled. We can drop this
2610          * extra memory-to-memory copying and use iovec buffers for constructing
2611          * write request.
2612          */
2613
2614         rc = generic_write_checks(iocb, from);
2615         if (rc <= 0)
2616                 return rc;
2617
2618         INIT_LIST_HEAD(&wdata_list);
2619         cifs_sb = CIFS_FILE_SB(file);
2620         open_file = file->private_data;
2621         tcon = tlink_tcon(open_file->tlink);
2622
2623         if (!tcon->ses->server->ops->async_writev)
2624                 return -ENOSYS;
2625
2626         rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2627                                   open_file, cifs_sb, &wdata_list);
2628
2629         /*
2630          * If at least one write was successfully sent, then discard any rc
2631          * value from the later writes. If the other write succeeds, then
2632          * we'll end up returning whatever was written. If it fails, then
2633          * we'll get a new rc value from that.
2634          */
2635         if (!list_empty(&wdata_list))
2636                 rc = 0;
2637
2638         /*
2639          * Wait for and collect replies for any successful sends in order of
2640          * increasing offset. Once an error is hit or we get a fatal signal
2641          * while waiting, then return without waiting for any more replies.
2642          */
2643 restart_loop:
2644         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2645                 if (!rc) {
2646                         /* FIXME: freezable too? */
2647                         rc = wait_for_completion_killable(&wdata->done);
2648                         if (rc)
2649                                 rc = -EINTR;
2650                         else if (wdata->result)
2651                                 rc = wdata->result;
2652                         else
2653                                 total_written += wdata->bytes;
2654
2655                         /* resend call if it's a retryable error */
2656                         if (rc == -EAGAIN) {
2657                                 struct list_head tmp_list;
2658                                 struct iov_iter tmp_from = saved_from;
2659
2660                                 INIT_LIST_HEAD(&tmp_list);
2661                                 list_del_init(&wdata->list);
2662
2663                                 iov_iter_advance(&tmp_from,
2664                                                  wdata->offset - iocb->ki_pos);
2665
2666                                 rc = cifs_write_from_iter(wdata->offset,
2667                                                 wdata->bytes, &tmp_from,
2668                                                 open_file, cifs_sb, &tmp_list);
2669
2670                                 list_splice(&tmp_list, &wdata_list);
2671
2672                                 kref_put(&wdata->refcount,
2673                                          cifs_uncached_writedata_release);
2674                                 goto restart_loop;
2675                         }
2676                 }
2677                 list_del_init(&wdata->list);
2678                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2679         }
2680
2681         if (unlikely(!total_written))
2682                 return rc;
2683
2684         iocb->ki_pos += total_written;
2685         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2686         cifs_stats_bytes_written(tcon, total_written);
2687         return total_written;
2688 }
2689
2690 static ssize_t
2691 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2692 {
2693         struct file *file = iocb->ki_filp;
2694         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2695         struct inode *inode = file->f_mapping->host;
2696         struct cifsInodeInfo *cinode = CIFS_I(inode);
2697         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2698         ssize_t rc;
2699
2700         /*
2701          * We need to hold the sem to be sure nobody modifies lock list
2702          * with a brlock that prevents writing.
2703          */
2704         down_read(&cinode->lock_sem);
2705         inode_lock(inode);
2706
2707         rc = generic_write_checks(iocb, from);
2708         if (rc <= 0)
2709                 goto out;
2710
2711         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2712                                      server->vals->exclusive_lock_type, NULL,
2713                                      CIFS_WRITE_OP))
2714                 rc = __generic_file_write_iter(iocb, from);
2715         else
2716                 rc = -EACCES;
2717 out:
2718         inode_unlock(inode);
2719
2720         if (rc > 0)
2721                 rc = generic_write_sync(iocb, rc);
2722         up_read(&cinode->lock_sem);
2723         return rc;
2724 }
2725
2726 ssize_t
2727 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2728 {
2729         struct inode *inode = file_inode(iocb->ki_filp);
2730         struct cifsInodeInfo *cinode = CIFS_I(inode);
2731         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2732         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2733                                                 iocb->ki_filp->private_data;
2734         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2735         ssize_t written;
2736
2737         written = cifs_get_writer(cinode);
2738         if (written)
2739                 return written;
2740
2741         if (CIFS_CACHE_WRITE(cinode)) {
2742                 if (cap_unix(tcon->ses) &&
2743                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2744                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2745                         written = generic_file_write_iter(iocb, from);
2746                         goto out;
2747                 }
2748                 written = cifs_writev(iocb, from);
2749                 goto out;
2750         }
2751         /*
2752          * For non-oplocked files in strict cache mode we need to write the data
2753          * to the server exactly from the pos to pos+len-1 rather than flush all
2754          * affected pages because it may cause a error with mandatory locks on
2755          * these pages but not on the region from pos to ppos+len-1.
2756          */
2757         written = cifs_user_writev(iocb, from);
2758         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2759                 /*
2760                  * Windows 7 server can delay breaking level2 oplock if a write
2761                  * request comes - break it on the client to prevent reading
2762                  * an old data.
2763                  */
2764                 cifs_zap_mapping(inode);
2765                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2766                          inode);
2767                 cinode->oplock = 0;
2768         }
2769 out:
2770         cifs_put_writer(cinode);
2771         return written;
2772 }
2773
2774 static struct cifs_readdata *
2775 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2776 {
2777         struct cifs_readdata *rdata;
2778
2779         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2780                         GFP_KERNEL);
2781         if (rdata != NULL) {
2782                 kref_init(&rdata->refcount);
2783                 INIT_LIST_HEAD(&rdata->list);
2784                 init_completion(&rdata->done);
2785                 INIT_WORK(&rdata->work, complete);
2786         }
2787
2788         return rdata;
2789 }
2790
2791 void
2792 cifs_readdata_release(struct kref *refcount)
2793 {
2794         struct cifs_readdata *rdata = container_of(refcount,
2795                                         struct cifs_readdata, refcount);
2796
2797         if (rdata->cfile)
2798                 cifsFileInfo_put(rdata->cfile);
2799
2800         kfree(rdata);
2801 }
2802
2803 static int
2804 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2805 {
2806         int rc = 0;
2807         struct page *page;
2808         unsigned int i;
2809
2810         for (i = 0; i < nr_pages; i++) {
2811                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2812                 if (!page) {
2813                         rc = -ENOMEM;
2814                         break;
2815                 }
2816                 rdata->pages[i] = page;
2817         }
2818
2819         if (rc) {
2820                 for (i = 0; i < nr_pages; i++) {
2821                         put_page(rdata->pages[i]);
2822                         rdata->pages[i] = NULL;
2823                 }
2824         }
2825         return rc;
2826 }
2827
2828 static void
2829 cifs_uncached_readdata_release(struct kref *refcount)
2830 {
2831         struct cifs_readdata *rdata = container_of(refcount,
2832                                         struct cifs_readdata, refcount);
2833         unsigned int i;
2834
2835         for (i = 0; i < rdata->nr_pages; i++) {
2836                 put_page(rdata->pages[i]);
2837                 rdata->pages[i] = NULL;
2838         }
2839         cifs_readdata_release(refcount);
2840 }
2841
2842 /**
2843  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2844  * @rdata:      the readdata response with list of pages holding data
2845  * @iter:       destination for our data
2846  *
2847  * This function copies data from a list of pages in a readdata response into
2848  * an array of iovecs. It will first calculate where the data should go
2849  * based on the info in the readdata and then copy the data into that spot.
2850  */
2851 static int
2852 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2853 {
2854         size_t remaining = rdata->got_bytes;
2855         unsigned int i;
2856
2857         for (i = 0; i < rdata->nr_pages; i++) {
2858                 struct page *page = rdata->pages[i];
2859                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2860                 size_t written = copy_page_to_iter(page, 0, copy, iter);
2861                 remaining -= written;
2862                 if (written < copy && iov_iter_count(iter) > 0)
2863                         break;
2864         }
2865         return remaining ? -EFAULT : 0;
2866 }
2867
2868 static void
2869 cifs_uncached_readv_complete(struct work_struct *work)
2870 {
2871         struct cifs_readdata *rdata = container_of(work,
2872                                                 struct cifs_readdata, work);
2873
2874         complete(&rdata->done);
2875         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2876 }
2877
2878 static int
2879 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2880                         struct cifs_readdata *rdata, unsigned int len)
2881 {
2882         int result = 0;
2883         unsigned int i;
2884         unsigned int nr_pages = rdata->nr_pages;
2885
2886         rdata->got_bytes = 0;
2887         rdata->tailsz = PAGE_SIZE;
2888         for (i = 0; i < nr_pages; i++) {
2889                 struct page *page = rdata->pages[i];
2890                 size_t n;
2891
2892                 if (len <= 0) {
2893                         /* no need to hold page hostage */
2894                         rdata->pages[i] = NULL;
2895                         rdata->nr_pages--;
2896                         put_page(page);
2897                         continue;
2898                 }
2899                 n = len;
2900                 if (len >= PAGE_SIZE) {
2901                         /* enough data to fill the page */
2902                         n = PAGE_SIZE;
2903                         len -= n;
2904                 } else {
2905                         zero_user(page, len, PAGE_SIZE - len);
2906                         rdata->tailsz = len;
2907                         len = 0;
2908                 }
2909                 result = cifs_read_page_from_socket(server, page, n);
2910                 if (result < 0)
2911                         break;
2912
2913                 rdata->got_bytes += result;
2914         }
2915
2916         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2917                                                 rdata->got_bytes : result;
2918 }
2919
2920 static int
2921 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2922                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2923 {
2924         struct cifs_readdata *rdata;
2925         unsigned int npages, rsize, credits;
2926         size_t cur_len;
2927         int rc;
2928         pid_t pid;
2929         struct TCP_Server_Info *server;
2930
2931         server = tlink_tcon(open_file->tlink)->ses->server;
2932
2933         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2934                 pid = open_file->pid;
2935         else
2936                 pid = current->tgid;
2937
2938         do {
2939                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2940                                                    &rsize, &credits);
2941                 if (rc)
2942                         break;
2943
2944                 cur_len = min_t(const size_t, len, rsize);
2945                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2946
2947                 /* allocate a readdata struct */
2948                 rdata = cifs_readdata_alloc(npages,
2949                                             cifs_uncached_readv_complete);
2950                 if (!rdata) {
2951                         add_credits_and_wake_if(server, credits, 0);
2952                         rc = -ENOMEM;
2953                         break;
2954                 }
2955
2956                 rc = cifs_read_allocate_pages(rdata, npages);
2957                 if (rc)
2958                         goto error;
2959
2960                 rdata->cfile = cifsFileInfo_get(open_file);
2961                 rdata->nr_pages = npages;
2962                 rdata->offset = offset;
2963                 rdata->bytes = cur_len;
2964                 rdata->pid = pid;
2965                 rdata->pagesz = PAGE_SIZE;
2966                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2967                 rdata->credits = credits;
2968
2969                 if (!rdata->cfile->invalidHandle ||
2970                     !cifs_reopen_file(rdata->cfile, true))
2971                         rc = server->ops->async_readv(rdata);
2972 error:
2973                 if (rc) {
2974                         add_credits_and_wake_if(server, rdata->credits, 0);
2975                         kref_put(&rdata->refcount,
2976                                  cifs_uncached_readdata_release);
2977                         if (rc == -EAGAIN)
2978                                 continue;
2979                         break;
2980                 }
2981
2982                 list_add_tail(&rdata->list, rdata_list);
2983                 offset += cur_len;
2984                 len -= cur_len;
2985         } while (len > 0);
2986
2987         return rc;
2988 }
2989
2990 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2991 {
2992         struct file *file = iocb->ki_filp;
2993         ssize_t rc;
2994         size_t len;
2995         ssize_t total_read = 0;
2996         loff_t offset = iocb->ki_pos;
2997         struct cifs_sb_info *cifs_sb;
2998         struct cifs_tcon *tcon;
2999         struct cifsFileInfo *open_file;
3000         struct cifs_readdata *rdata, *tmp;
3001         struct list_head rdata_list;
3002
3003         len = iov_iter_count(to);
3004         if (!len)
3005                 return 0;
3006
3007         INIT_LIST_HEAD(&rdata_list);
3008         cifs_sb = CIFS_FILE_SB(file);
3009         open_file = file->private_data;
3010         tcon = tlink_tcon(open_file->tlink);
3011
3012         if (!tcon->ses->server->ops->async_readv)
3013                 return -ENOSYS;
3014
3015         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3016                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3017
3018         rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3019
3020         /* if at least one read request send succeeded, then reset rc */
3021         if (!list_empty(&rdata_list))
3022                 rc = 0;
3023
3024         len = iov_iter_count(to);
3025         /* the loop below should proceed in the order of increasing offsets */
3026 again:
3027         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3028                 if (!rc) {
3029                         /* FIXME: freezable sleep too? */
3030                         rc = wait_for_completion_killable(&rdata->done);
3031                         if (rc)
3032                                 rc = -EINTR;
3033                         else if (rdata->result == -EAGAIN) {
3034                                 /* resend call if it's a retryable error */
3035                                 struct list_head tmp_list;
3036                                 unsigned int got_bytes = rdata->got_bytes;
3037
3038                                 list_del_init(&rdata->list);
3039                                 INIT_LIST_HEAD(&tmp_list);
3040
3041                                 /*
3042                                  * Got a part of data and then reconnect has
3043                                  * happened -- fill the buffer and continue
3044                                  * reading.
3045                                  */
3046                                 if (got_bytes && got_bytes < rdata->bytes) {
3047                                         rc = cifs_readdata_to_iov(rdata, to);
3048                                         if (rc) {
3049                                                 kref_put(&rdata->refcount,
3050                                                 cifs_uncached_readdata_release);
3051                                                 continue;
3052                                         }
3053                                 }
3054
3055                                 rc = cifs_send_async_read(
3056                                                 rdata->offset + got_bytes,
3057                                                 rdata->bytes - got_bytes,
3058                                                 rdata->cfile, cifs_sb,
3059                                                 &tmp_list);
3060
3061                                 list_splice(&tmp_list, &rdata_list);
3062
3063                                 kref_put(&rdata->refcount,
3064                                          cifs_uncached_readdata_release);
3065                                 goto again;
3066                         } else if (rdata->result)
3067                                 rc = rdata->result;
3068                         else
3069                                 rc = cifs_readdata_to_iov(rdata, to);
3070
3071                         /* if there was a short read -- discard anything left */
3072                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3073                                 rc = -ENODATA;
3074                 }
3075                 list_del_init(&rdata->list);
3076                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3077         }
3078
3079         total_read = len - iov_iter_count(to);
3080
3081         cifs_stats_bytes_read(tcon, total_read);
3082
3083         /* mask nodata case */
3084         if (rc == -ENODATA)
3085                 rc = 0;
3086
3087         if (total_read) {
3088                 iocb->ki_pos += total_read;
3089                 return total_read;
3090         }
3091         return rc;
3092 }
3093
3094 ssize_t
3095 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3096 {
3097         struct inode *inode = file_inode(iocb->ki_filp);
3098         struct cifsInodeInfo *cinode = CIFS_I(inode);
3099         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3100         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3101                                                 iocb->ki_filp->private_data;
3102         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3103         int rc = -EACCES;
3104
3105         /*
3106          * In strict cache mode we need to read from the server all the time
3107          * if we don't have level II oplock because the server can delay mtime
3108          * change - so we can't make a decision about inode invalidating.
3109          * And we can also fail with pagereading if there are mandatory locks
3110          * on pages affected by this read but not on the region from pos to
3111          * pos+len-1.
3112          */
3113         if (!CIFS_CACHE_READ(cinode))
3114                 return cifs_user_readv(iocb, to);
3115
3116         if (cap_unix(tcon->ses) &&
3117             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3118             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3119                 return generic_file_read_iter(iocb, to);
3120
3121         /*
3122          * We need to hold the sem to be sure nobody modifies lock list
3123          * with a brlock that prevents reading.
3124          */
3125         down_read(&cinode->lock_sem);
3126         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3127                                      tcon->ses->server->vals->shared_lock_type,
3128                                      NULL, CIFS_READ_OP))
3129                 rc = generic_file_read_iter(iocb, to);
3130         up_read(&cinode->lock_sem);
3131         return rc;
3132 }
3133
3134 static ssize_t
3135 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3136 {
3137         int rc = -EACCES;
3138         unsigned int bytes_read = 0;
3139         unsigned int total_read;
3140         unsigned int current_read_size;
3141         unsigned int rsize;
3142         struct cifs_sb_info *cifs_sb;
3143         struct cifs_tcon *tcon;
3144         struct TCP_Server_Info *server;
3145         unsigned int xid;
3146         char *cur_offset;
3147         struct cifsFileInfo *open_file;
3148         struct cifs_io_parms io_parms;
3149         int buf_type = CIFS_NO_BUFFER;
3150         __u32 pid;
3151
3152         xid = get_xid();
3153         cifs_sb = CIFS_FILE_SB(file);
3154
3155         /* FIXME: set up handlers for larger reads and/or convert to async */
3156         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3157
3158         if (file->private_data == NULL) {
3159                 rc = -EBADF;
3160                 free_xid(xid);
3161                 return rc;
3162         }
3163         open_file = file->private_data;
3164         tcon = tlink_tcon(open_file->tlink);
3165         server = tcon->ses->server;
3166
3167         if (!server->ops->sync_read) {
3168                 free_xid(xid);
3169                 return -ENOSYS;
3170         }
3171
3172         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3173                 pid = open_file->pid;
3174         else
3175                 pid = current->tgid;
3176
3177         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3178                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3179
3180         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3181              total_read += bytes_read, cur_offset += bytes_read) {
3182                 do {
3183                         current_read_size = min_t(uint, read_size - total_read,
3184                                                   rsize);
3185                         /*
3186                          * For windows me and 9x we do not want to request more
3187                          * than it negotiated since it will refuse the read
3188                          * then.
3189                          */
3190                         if ((tcon->ses) && !(tcon->ses->capabilities &
3191                                 tcon->ses->server->vals->cap_large_files)) {
3192                                 current_read_size = min_t(uint,
3193                                         current_read_size, CIFSMaxBufSize);
3194                         }
3195                         if (open_file->invalidHandle) {
3196                                 rc = cifs_reopen_file(open_file, true);
3197                                 if (rc != 0)
3198                                         break;
3199                         }
3200                         io_parms.pid = pid;
3201                         io_parms.tcon = tcon;
3202                         io_parms.offset = *offset;
3203                         io_parms.length = current_read_size;
3204                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3205                                                     &bytes_read, &cur_offset,
3206                                                     &buf_type);
3207                 } while (rc == -EAGAIN);
3208
3209                 if (rc || (bytes_read == 0)) {
3210                         if (total_read) {
3211                                 break;
3212                         } else {
3213                                 free_xid(xid);
3214                                 return rc;
3215                         }
3216                 } else {
3217                         cifs_stats_bytes_read(tcon, total_read);
3218                         *offset += bytes_read;
3219                 }
3220         }
3221         free_xid(xid);
3222         return total_read;
3223 }
3224
3225 /*
3226  * If the page is mmap'ed into a process' page tables, then we need to make
3227  * sure that it doesn't change while being written back.
3228  */
3229 static int
3230 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3231 {
3232         struct page *page = vmf->page;
3233
3234         lock_page(page);
3235         return VM_FAULT_LOCKED;
3236 }
3237
3238 static const struct vm_operations_struct cifs_file_vm_ops = {
3239         .fault = filemap_fault,
3240         .map_pages = filemap_map_pages,
3241         .page_mkwrite = cifs_page_mkwrite,
3242 };
3243
3244 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3245 {
3246         int rc, xid;
3247         struct inode *inode = file_inode(file);
3248
3249         xid = get_xid();
3250
3251         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3252                 rc = cifs_zap_mapping(inode);
3253                 if (rc)
3254                         return rc;
3255         }
3256
3257         rc = generic_file_mmap(file, vma);
3258         if (rc == 0)
3259                 vma->vm_ops = &cifs_file_vm_ops;
3260         free_xid(xid);
3261         return rc;
3262 }
3263
3264 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3265 {
3266         int rc, xid;
3267
3268         xid = get_xid();
3269         rc = cifs_revalidate_file(file);
3270         if (rc) {
3271                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3272                          rc);
3273                 free_xid(xid);
3274                 return rc;
3275         }
3276         rc = generic_file_mmap(file, vma);
3277         if (rc == 0)
3278                 vma->vm_ops = &cifs_file_vm_ops;
3279         free_xid(xid);
3280         return rc;
3281 }
3282
3283 static void
3284 cifs_readv_complete(struct work_struct *work)
3285 {
3286         unsigned int i, got_bytes;
3287         struct cifs_readdata *rdata = container_of(work,
3288                                                 struct cifs_readdata, work);
3289
3290         got_bytes = rdata->got_bytes;
3291         for (i = 0; i < rdata->nr_pages; i++) {
3292                 struct page *page = rdata->pages[i];
3293
3294                 lru_cache_add_file(page);
3295
3296                 if (rdata->result == 0 ||
3297                     (rdata->result == -EAGAIN && got_bytes)) {
3298                         flush_dcache_page(page);
3299                         SetPageUptodate(page);
3300                 }
3301
3302                 unlock_page(page);
3303
3304                 if (rdata->result == 0 ||
3305                     (rdata->result == -EAGAIN && got_bytes))
3306                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3307
3308                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3309
3310                 put_page(page);
3311                 rdata->pages[i] = NULL;
3312         }
3313         kref_put(&rdata->refcount, cifs_readdata_release);
3314 }
3315
3316 static int
3317 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3318                         struct cifs_readdata *rdata, unsigned int len)
3319 {
3320         int result = 0;
3321         unsigned int i;
3322         u64 eof;
3323         pgoff_t eof_index;
3324         unsigned int nr_pages = rdata->nr_pages;
3325
3326         /* determine the eof that the server (probably) has */
3327         eof = CIFS_I(rdata->mapping->host)->server_eof;
3328         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3329         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3330
3331         rdata->got_bytes = 0;
3332         rdata->tailsz = PAGE_SIZE;
3333         for (i = 0; i < nr_pages; i++) {
3334                 struct page *page = rdata->pages[i];
3335                 size_t n = PAGE_SIZE;
3336
3337                 if (len >= PAGE_SIZE) {
3338                         len -= PAGE_SIZE;
3339                 } else if (len > 0) {
3340                         /* enough for partial page, fill and zero the rest */
3341                         zero_user(page, len, PAGE_SIZE - len);
3342                         n = rdata->tailsz = len;
3343                         len = 0;
3344                 } else if (page->index > eof_index) {
3345                         /*
3346                          * The VFS will not try to do readahead past the
3347                          * i_size, but it's possible that we have outstanding
3348                          * writes with gaps in the middle and the i_size hasn't
3349                          * caught up yet. Populate those with zeroed out pages
3350                          * to prevent the VFS from repeatedly attempting to
3351                          * fill them until the writes are flushed.
3352                          */
3353                         zero_user(page, 0, PAGE_SIZE);
3354                         lru_cache_add_file(page);
3355                         flush_dcache_page(page);
3356                         SetPageUptodate(page);
3357                         unlock_page(page);
3358                         put_page(page);
3359                         rdata->pages[i] = NULL;
3360                         rdata->nr_pages--;
3361                         continue;
3362                 } else {
3363                         /* no need to hold page hostage */
3364                         lru_cache_add_file(page);
3365                         unlock_page(page);
3366                         put_page(page);
3367                         rdata->pages[i] = NULL;
3368                         rdata->nr_pages--;
3369                         continue;
3370                 }
3371
3372                 result = cifs_read_page_from_socket(server, page, n);
3373                 if (result < 0)
3374                         break;
3375
3376                 rdata->got_bytes += result;
3377         }
3378
3379         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3380                                                 rdata->got_bytes : result;
3381 }
3382
3383 static int
3384 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3385                     unsigned int rsize, struct list_head *tmplist,
3386                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3387 {
3388         struct page *page, *tpage;
3389         unsigned int expected_index;
3390         int rc;
3391         gfp_t gfp = readahead_gfp_mask(mapping);
3392
3393         INIT_LIST_HEAD(tmplist);
3394
3395         page = list_entry(page_list->prev, struct page, lru);
3396
3397         /*
3398          * Lock the page and put it in the cache. Since no one else
3399          * should have access to this page, we're safe to simply set
3400          * PG_locked without checking it first.
3401          */
3402         __SetPageLocked(page);
3403         rc = add_to_page_cache_locked(page, mapping,
3404                                       page->index, gfp);
3405
3406         /* give up if we can't stick it in the cache */
3407         if (rc) {
3408                 __ClearPageLocked(page);
3409                 return rc;
3410         }
3411
3412         /* move first page to the tmplist */
3413         *offset = (loff_t)page->index << PAGE_SHIFT;
3414         *bytes = PAGE_SIZE;
3415         *nr_pages = 1;
3416         list_move_tail(&page->lru, tmplist);
3417
3418         /* now try and add more pages onto the request */
3419         expected_index = page->index + 1;
3420         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3421                 /* discontinuity ? */
3422                 if (page->index != expected_index)
3423                         break;
3424
3425                 /* would this page push the read over the rsize? */
3426                 if (*bytes + PAGE_SIZE > rsize)
3427                         break;
3428
3429                 __SetPageLocked(page);
3430                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3431                         __ClearPageLocked(page);
3432                         break;
3433                 }
3434                 list_move_tail(&page->lru, tmplist);
3435                 (*bytes) += PAGE_SIZE;
3436                 expected_index++;
3437                 (*nr_pages)++;
3438         }
3439         return rc;
3440 }
3441
3442 static int cifs_readpages(struct file *file, struct address_space *mapping,
3443         struct list_head *page_list, unsigned num_pages)
3444 {
3445         int rc;
3446         struct list_head tmplist;
3447         struct cifsFileInfo *open_file = file->private_data;
3448         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3449         struct TCP_Server_Info *server;
3450         pid_t pid;
3451
3452         /*
3453          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3454          * immediately if the cookie is negative
3455          *
3456          * After this point, every page in the list might have PG_fscache set,
3457          * so we will need to clean that up off of every page we don't use.
3458          */
3459         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3460                                          &num_pages);
3461         if (rc == 0)
3462                 return rc;
3463
3464         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3465                 pid = open_file->pid;
3466         else
3467                 pid = current->tgid;
3468
3469         rc = 0;
3470         server = tlink_tcon(open_file->tlink)->ses->server;
3471
3472         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3473                  __func__, file, mapping, num_pages);
3474
3475         /*
3476          * Start with the page at end of list and move it to private
3477          * list. Do the same with any following pages until we hit
3478          * the rsize limit, hit an index discontinuity, or run out of
3479          * pages. Issue the async read and then start the loop again
3480          * until the list is empty.
3481          *
3482          * Note that list order is important. The page_list is in
3483          * the order of declining indexes. When we put the pages in
3484          * the rdata->pages, then we want them in increasing order.
3485          */
3486         while (!list_empty(page_list)) {
3487                 unsigned int i, nr_pages, bytes, rsize;
3488                 loff_t offset;
3489                 struct page *page, *tpage;
3490                 struct cifs_readdata *rdata;
3491                 unsigned credits;
3492
3493                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3494                                                    &rsize, &credits);
3495                 if (rc)
3496                         break;
3497
3498                 /*
3499                  * Give up immediately if rsize is too small to read an entire
3500                  * page. The VFS will fall back to readpage. We should never
3501                  * reach this point however since we set ra_pages to 0 when the
3502                  * rsize is smaller than a cache page.
3503                  */
3504                 if (unlikely(rsize < PAGE_SIZE)) {
3505                         add_credits_and_wake_if(server, credits, 0);
3506                         return 0;
3507                 }
3508
3509                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3510                                          &nr_pages, &offset, &bytes);
3511                 if (rc) {
3512                         add_credits_and_wake_if(server, credits, 0);
3513                         break;
3514                 }
3515
3516                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3517                 if (!rdata) {
3518                         /* best to give up if we're out of mem */
3519                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3520                                 list_del(&page->lru);
3521                                 lru_cache_add_file(page);
3522                                 unlock_page(page);
3523                                 put_page(page);
3524                         }
3525                         rc = -ENOMEM;
3526                         add_credits_and_wake_if(server, credits, 0);
3527                         break;
3528                 }
3529
3530                 rdata->cfile = cifsFileInfo_get(open_file);
3531                 rdata->mapping = mapping;
3532                 rdata->offset = offset;
3533                 rdata->bytes = bytes;
3534                 rdata->pid = pid;
3535                 rdata->pagesz = PAGE_SIZE;
3536                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3537                 rdata->credits = credits;
3538
3539                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3540                         list_del(&page->lru);
3541                         rdata->pages[rdata->nr_pages++] = page;
3542                 }
3543
3544                 if (!rdata->cfile->invalidHandle ||
3545                     !cifs_reopen_file(rdata->cfile, true))
3546                         rc = server->ops->async_readv(rdata);
3547                 if (rc) {
3548                         add_credits_and_wake_if(server, rdata->credits, 0);
3549                         for (i = 0; i < rdata->nr_pages; i++) {
3550                                 page = rdata->pages[i];
3551                                 lru_cache_add_file(page);
3552                                 unlock_page(page);
3553                                 put_page(page);
3554                         }
3555                         /* Fallback to the readpage in error/reconnect cases */
3556                         kref_put(&rdata->refcount, cifs_readdata_release);
3557                         break;
3558                 }
3559
3560                 kref_put(&rdata->refcount, cifs_readdata_release);
3561         }
3562
3563         /* Any pages that have been shown to fscache but didn't get added to
3564          * the pagecache must be uncached before they get returned to the
3565          * allocator.
3566          */
3567         cifs_fscache_readpages_cancel(mapping->host, page_list);
3568         return rc;
3569 }
3570
3571 /*
3572  * cifs_readpage_worker must be called with the page pinned
3573  */
3574 static int cifs_readpage_worker(struct file *file, struct page *page,
3575         loff_t *poffset)
3576 {
3577         char *read_data;
3578         int rc;
3579
3580         /* Is the page cached? */
3581         rc = cifs_readpage_from_fscache(file_inode(file), page);
3582         if (rc == 0)
3583                 goto read_complete;
3584
3585         read_data = kmap(page);
3586         /* for reads over a certain size could initiate async read ahead */
3587
3588         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3589
3590         if (rc < 0)
3591                 goto io_error;
3592         else
3593                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3594
3595         file_inode(file)->i_atime =
3596                 current_time(file_inode(file));
3597
3598         if (PAGE_SIZE > rc)
3599                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3600
3601         flush_dcache_page(page);
3602         SetPageUptodate(page);
3603
3604         /* send this page to the cache */
3605         cifs_readpage_to_fscache(file_inode(file), page);
3606
3607         rc = 0;
3608
3609 io_error:
3610         kunmap(page);
3611         unlock_page(page);
3612
3613 read_complete:
3614         return rc;
3615 }
3616
3617 static int cifs_readpage(struct file *file, struct page *page)
3618 {
3619         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3620         int rc = -EACCES;
3621         unsigned int xid;
3622
3623         xid = get_xid();
3624
3625         if (file->private_data == NULL) {
3626                 rc = -EBADF;
3627                 free_xid(xid);
3628                 return rc;
3629         }
3630
3631         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3632                  page, (int)offset, (int)offset);
3633
3634         rc = cifs_readpage_worker(file, page, &offset);
3635
3636         free_xid(xid);
3637         return rc;
3638 }
3639
3640 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3641 {
3642         struct cifsFileInfo *open_file;
3643         struct cifs_tcon *tcon =
3644                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3645
3646         spin_lock(&tcon->open_file_lock);
3647         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3648                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3649                         spin_unlock(&tcon->open_file_lock);
3650                         return 1;
3651                 }
3652         }
3653         spin_unlock(&tcon->open_file_lock);
3654         return 0;
3655 }
3656
3657 /* We do not want to update the file size from server for inodes
3658    open for write - to avoid races with writepage extending
3659    the file - in the future we could consider allowing
3660    refreshing the inode only on increases in the file size
3661    but this is tricky to do without racing with writebehind
3662    page caching in the current Linux kernel design */
3663 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3664 {
3665         if (!cifsInode)
3666                 return true;
3667
3668         if (is_inode_writable(cifsInode)) {
3669                 /* This inode is open for write at least once */
3670                 struct cifs_sb_info *cifs_sb;
3671
3672                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3673                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3674                         /* since no page cache to corrupt on directio
3675                         we can change size safely */
3676                         return true;
3677                 }
3678
3679                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3680                         return true;
3681
3682                 return false;
3683         } else
3684                 return true;
3685 }
3686
3687 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3688                         loff_t pos, unsigned len, unsigned flags,
3689                         struct page **pagep, void **fsdata)
3690 {
3691         int oncethru = 0;
3692         pgoff_t index = pos >> PAGE_SHIFT;
3693         loff_t offset = pos & (PAGE_SIZE - 1);
3694         loff_t page_start = pos & PAGE_MASK;
3695         loff_t i_size;
3696         struct page *page;
3697         int rc = 0;
3698
3699         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3700
3701 start:
3702         page = grab_cache_page_write_begin(mapping, index, flags);
3703         if (!page) {
3704                 rc = -ENOMEM;
3705                 goto out;
3706         }
3707
3708         if (PageUptodate(page))
3709                 goto out;
3710
3711         /*
3712          * If we write a full page it will be up to date, no need to read from
3713          * the server. If the write is short, we'll end up doing a sync write
3714          * instead.
3715          */
3716         if (len == PAGE_SIZE)
3717                 goto out;
3718
3719         /*
3720          * optimize away the read when we have an oplock, and we're not
3721          * expecting to use any of the data we'd be reading in. That
3722          * is, when the page lies beyond the EOF, or straddles the EOF
3723          * and the write will cover all of the existing data.
3724          */
3725         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3726                 i_size = i_size_read(mapping->host);
3727                 if (page_start >= i_size ||
3728                     (offset == 0 && (pos + len) >= i_size)) {
3729                         zero_user_segments(page, 0, offset,
3730                                            offset + len,
3731                                            PAGE_SIZE);
3732                         /*
3733                          * PageChecked means that the parts of the page
3734                          * to which we're not writing are considered up
3735                          * to date. Once the data is copied to the
3736                          * page, it can be set uptodate.
3737                          */
3738                         SetPageChecked(page);
3739                         goto out;
3740                 }
3741         }
3742
3743         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3744                 /*
3745                  * might as well read a page, it is fast enough. If we get
3746                  * an error, we don't need to return it. cifs_write_end will
3747                  * do a sync write instead since PG_uptodate isn't set.
3748                  */
3749                 cifs_readpage_worker(file, page, &page_start);
3750                 put_page(page);
3751                 oncethru = 1;
3752                 goto start;
3753         } else {
3754                 /* we could try using another file handle if there is one -
3755                    but how would we lock it to prevent close of that handle
3756                    racing with this read? In any case
3757                    this will be written out by write_end so is fine */
3758         }
3759 out:
3760         *pagep = page;
3761         return rc;
3762 }
3763
3764 static int cifs_release_page(struct page *page, gfp_t gfp)
3765 {
3766         if (PagePrivate(page))
3767                 return 0;
3768
3769         return cifs_fscache_release_page(page, gfp);
3770 }
3771
3772 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3773                                  unsigned int length)
3774 {
3775         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3776
3777         if (offset == 0 && length == PAGE_SIZE)
3778                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3779 }
3780
3781 static int cifs_launder_page(struct page *page)
3782 {
3783         int rc = 0;
3784         loff_t range_start = page_offset(page);
3785         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3786         struct writeback_control wbc = {
3787                 .sync_mode = WB_SYNC_ALL,
3788                 .nr_to_write = 0,
3789                 .range_start = range_start,
3790                 .range_end = range_end,
3791         };
3792
3793         cifs_dbg(FYI, "Launder page: %p\n", page);
3794
3795         if (clear_page_dirty_for_io(page))
3796                 rc = cifs_writepage_locked(page, &wbc);
3797
3798         cifs_fscache_invalidate_page(page, page->mapping->host);
3799         return rc;
3800 }
3801
3802 void cifs_oplock_break(struct work_struct *work)
3803 {
3804         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3805                                                   oplock_break);
3806         struct inode *inode = d_inode(cfile->dentry);
3807         struct cifsInodeInfo *cinode = CIFS_I(inode);
3808         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3809         struct TCP_Server_Info *server = tcon->ses->server;
3810         int rc = 0;
3811
3812         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3813                         TASK_UNINTERRUPTIBLE);
3814
3815         server->ops->downgrade_oplock(server, cinode,
3816                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3817
3818         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3819                                                 cifs_has_mand_locks(cinode)) {
3820                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3821                          inode);
3822                 cinode->oplock = 0;
3823         }
3824
3825         if (inode && S_ISREG(inode->i_mode)) {
3826                 if (CIFS_CACHE_READ(cinode))
3827                         break_lease(inode, O_RDONLY);
3828                 else
3829                         break_lease(inode, O_WRONLY);
3830                 rc = filemap_fdatawrite(inode->i_mapping);
3831                 if (!CIFS_CACHE_READ(cinode)) {
3832                         rc = filemap_fdatawait(inode->i_mapping);
3833                         mapping_set_error(inode->i_mapping, rc);
3834                         cifs_zap_mapping(inode);
3835                 }
3836                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3837         }
3838
3839         rc = cifs_push_locks(cfile);
3840         if (rc)
3841                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3842
3843         /*
3844          * releasing stale oplock after recent reconnect of smb session using
3845          * a now incorrect file handle is not a data integrity issue but do
3846          * not bother sending an oplock release if session to server still is
3847          * disconnected since oplock already released by the server
3848          */
3849         if (!cfile->oplock_break_cancelled) {
3850                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3851                                                              cinode);
3852                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3853         }
3854         cifs_done_oplock_break(cinode);
3855 }
3856
3857 /*
3858  * The presence of cifs_direct_io() in the address space ops vector
3859  * allowes open() O_DIRECT flags which would have failed otherwise.
3860  *
3861  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3862  * so this method should never be called.
3863  *
3864  * Direct IO is not yet supported in the cached mode. 
3865  */
3866 static ssize_t
3867 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3868 {
3869         /*
3870          * FIXME
3871          * Eventually need to support direct IO for non forcedirectio mounts
3872          */
3873         return -EINVAL;
3874 }
3875
3876
3877 const struct address_space_operations cifs_addr_ops = {
3878         .readpage = cifs_readpage,
3879         .readpages = cifs_readpages,
3880         .writepage = cifs_writepage,
3881         .writepages = cifs_writepages,
3882         .write_begin = cifs_write_begin,
3883         .write_end = cifs_write_end,
3884         .set_page_dirty = __set_page_dirty_nobuffers,
3885         .releasepage = cifs_release_page,
3886         .direct_IO = cifs_direct_io,
3887         .invalidatepage = cifs_invalidate_page,
3888         .launder_page = cifs_launder_page,
3889 };
3890
3891 /*
3892  * cifs_readpages requires the server to support a buffer large enough to
3893  * contain the header plus one complete page of data.  Otherwise, we need
3894  * to leave cifs_readpages out of the address space operations.
3895  */
3896 const struct address_space_operations cifs_addr_ops_smallbuf = {
3897         .readpage = cifs_readpage,
3898         .writepage = cifs_writepage,
3899         .writepages = cifs_writepages,
3900         .write_begin = cifs_write_begin,
3901         .write_end = cifs_write_end,
3902         .set_page_dirty = __set_page_dirty_nobuffers,
3903         .releasepage = cifs_release_page,
3904         .invalidatepage = cifs_invalidate_page,
3905         .launder_page = cifs_launder_page,
3906 };