Merge branch 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cFYI(1, "Application %s pid %d has incorrectly set O_EXCL flag"
82                         "but not O_CREAT on file open. Ignoring O_EXCL",
83                         current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cFYI(1, "posix open %s", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_sb->mnt_cifs_flags &
145                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
146         cifs_put_tlink(tlink);
147
148         if (rc)
149                 goto posix_open_ret;
150
151         if (presp_data->Type == cpu_to_le32(-1))
152                 goto posix_open_ret; /* open ok, caller does qpathinfo */
153
154         if (!pinode)
155                 goto posix_open_ret; /* caller does not need info */
156
157         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
158
159         /* get new inode and set it up */
160         if (*pinode == NULL) {
161                 cifs_fill_uniqueid(sb, &fattr);
162                 *pinode = cifs_iget(sb, &fattr);
163                 if (!*pinode) {
164                         rc = -ENOMEM;
165                         goto posix_open_ret;
166                 }
167         } else {
168                 cifs_fattr_to_inode(*pinode, &fattr);
169         }
170
171 posix_open_ret:
172         kfree(presp_data);
173         return rc;
174 }
175
176 static int
177 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179              struct cifs_fid *fid, unsigned int xid)
180 {
181         int rc;
182         int desired_access;
183         int disposition;
184         int create_options = CREATE_NOT_DIR;
185         FILE_ALL_INFO *buf;
186         struct TCP_Server_Info *server = tcon->ses->server;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         rc = server->ops->open(xid, tcon, full_path, disposition,
229                                desired_access, create_options, fid, oplock, buf,
230                                cifs_sb);
231
232         if (rc)
233                 goto out;
234
235         if (tcon->unix_ext)
236                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
237                                               xid);
238         else
239                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
240                                          xid, &fid->netfid);
241
242 out:
243         kfree(buf);
244         return rc;
245 }
246
247 static bool
248 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
249 {
250         struct cifs_fid_locks *cur;
251         bool has_locks = false;
252
253         down_read(&cinode->lock_sem);
254         list_for_each_entry(cur, &cinode->llist, llist) {
255                 if (!list_empty(&cur->locks)) {
256                         has_locks = true;
257                         break;
258                 }
259         }
260         up_read(&cinode->lock_sem);
261         return has_locks;
262 }
263
264 struct cifsFileInfo *
265 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
266                   struct tcon_link *tlink, __u32 oplock)
267 {
268         struct dentry *dentry = file->f_path.dentry;
269         struct inode *inode = dentry->d_inode;
270         struct cifsInodeInfo *cinode = CIFS_I(inode);
271         struct cifsFileInfo *cfile;
272         struct cifs_fid_locks *fdlocks;
273         struct cifs_tcon *tcon = tlink_tcon(tlink);
274         struct TCP_Server_Info *server = tcon->ses->server;
275
276         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
277         if (cfile == NULL)
278                 return cfile;
279
280         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
281         if (!fdlocks) {
282                 kfree(cfile);
283                 return NULL;
284         }
285
286         INIT_LIST_HEAD(&fdlocks->locks);
287         fdlocks->cfile = cfile;
288         cfile->llist = fdlocks;
289         down_write(&cinode->lock_sem);
290         list_add(&fdlocks->llist, &cinode->llist);
291         up_write(&cinode->lock_sem);
292
293         cfile->count = 1;
294         cfile->pid = current->tgid;
295         cfile->uid = current_fsuid();
296         cfile->dentry = dget(dentry);
297         cfile->f_flags = file->f_flags;
298         cfile->invalidHandle = false;
299         cfile->tlink = cifs_get_tlink(tlink);
300         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
301         mutex_init(&cfile->fh_mutex);
302
303         cifs_sb_active(inode->i_sb);
304
305         /*
306          * If the server returned a read oplock and we have mandatory brlocks,
307          * set oplock level to None.
308          */
309         if (oplock == server->vals->oplock_read &&
310                                                 cifs_has_mand_locks(cinode)) {
311                 cFYI(1, "Reset oplock val from read to None due to mand locks");
312                 oplock = 0;
313         }
314
315         spin_lock(&cifs_file_list_lock);
316         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
317                 oplock = fid->pending_open->oplock;
318         list_del(&fid->pending_open->olist);
319
320         server->ops->set_fid(cfile, fid, oplock);
321
322         list_add(&cfile->tlist, &tcon->openFileList);
323         /* if readable file instance put first in list*/
324         if (file->f_mode & FMODE_READ)
325                 list_add(&cfile->flist, &cinode->openFileList);
326         else
327                 list_add_tail(&cfile->flist, &cinode->openFileList);
328         spin_unlock(&cifs_file_list_lock);
329
330         file->private_data = cfile;
331         return cfile;
332 }
333
334 struct cifsFileInfo *
335 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
336 {
337         spin_lock(&cifs_file_list_lock);
338         cifsFileInfo_get_locked(cifs_file);
339         spin_unlock(&cifs_file_list_lock);
340         return cifs_file;
341 }
342
343 /*
344  * Release a reference on the file private data. This may involve closing
345  * the filehandle out on the server. Must be called without holding
346  * cifs_file_list_lock.
347  */
348 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
349 {
350         struct inode *inode = cifs_file->dentry->d_inode;
351         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
352         struct TCP_Server_Info *server = tcon->ses->server;
353         struct cifsInodeInfo *cifsi = CIFS_I(inode);
354         struct super_block *sb = inode->i_sb;
355         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
356         struct cifsLockInfo *li, *tmp;
357         struct cifs_fid fid;
358         struct cifs_pending_open open;
359
360         spin_lock(&cifs_file_list_lock);
361         if (--cifs_file->count > 0) {
362                 spin_unlock(&cifs_file_list_lock);
363                 return;
364         }
365
366         if (server->ops->get_lease_key)
367                 server->ops->get_lease_key(inode, &fid);
368
369         /* store open in pending opens to make sure we don't miss lease break */
370         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
371
372         /* remove it from the lists */
373         list_del(&cifs_file->flist);
374         list_del(&cifs_file->tlist);
375
376         if (list_empty(&cifsi->openFileList)) {
377                 cFYI(1, "closing last open instance for inode %p",
378                         cifs_file->dentry->d_inode);
379                 /*
380                  * In strict cache mode we need invalidate mapping on the last
381                  * close  because it may cause a error when we open this file
382                  * again and get at least level II oplock.
383                  */
384                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
385                         CIFS_I(inode)->invalid_mapping = true;
386                 cifs_set_oplock_level(cifsi, 0);
387         }
388         spin_unlock(&cifs_file_list_lock);
389
390         cancel_work_sync(&cifs_file->oplock_break);
391
392         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
393                 struct TCP_Server_Info *server = tcon->ses->server;
394                 unsigned int xid;
395
396                 xid = get_xid();
397                 if (server->ops->close)
398                         server->ops->close(xid, tcon, &cifs_file->fid);
399                 _free_xid(xid);
400         }
401
402         cifs_del_pending_open(&open);
403
404         /*
405          * Delete any outstanding lock records. We'll lose them when the file
406          * is closed anyway.
407          */
408         down_write(&cifsi->lock_sem);
409         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
410                 list_del(&li->llist);
411                 cifs_del_lock_waiters(li);
412                 kfree(li);
413         }
414         list_del(&cifs_file->llist->llist);
415         kfree(cifs_file->llist);
416         up_write(&cifsi->lock_sem);
417
418         cifs_put_tlink(cifs_file->tlink);
419         dput(cifs_file->dentry);
420         cifs_sb_deactive(sb);
421         kfree(cifs_file);
422 }
423
424 int cifs_open(struct inode *inode, struct file *file)
425
426 {
427         int rc = -EACCES;
428         unsigned int xid;
429         __u32 oplock;
430         struct cifs_sb_info *cifs_sb;
431         struct TCP_Server_Info *server;
432         struct cifs_tcon *tcon;
433         struct tcon_link *tlink;
434         struct cifsFileInfo *cfile = NULL;
435         char *full_path = NULL;
436         bool posix_open_ok = false;
437         struct cifs_fid fid;
438         struct cifs_pending_open open;
439
440         xid = get_xid();
441
442         cifs_sb = CIFS_SB(inode->i_sb);
443         tlink = cifs_sb_tlink(cifs_sb);
444         if (IS_ERR(tlink)) {
445                 free_xid(xid);
446                 return PTR_ERR(tlink);
447         }
448         tcon = tlink_tcon(tlink);
449         server = tcon->ses->server;
450
451         full_path = build_path_from_dentry(file->f_path.dentry);
452         if (full_path == NULL) {
453                 rc = -ENOMEM;
454                 goto out;
455         }
456
457         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
458                  inode, file->f_flags, full_path);
459
460         if (server->oplocks)
461                 oplock = REQ_OPLOCK;
462         else
463                 oplock = 0;
464
465         if (!tcon->broken_posix_open && tcon->unix_ext &&
466             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
467                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
468                 /* can not refresh inode info since size could be stale */
469                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
470                                 cifs_sb->mnt_file_mode /* ignored */,
471                                 file->f_flags, &oplock, &fid.netfid, xid);
472                 if (rc == 0) {
473                         cFYI(1, "posix open succeeded");
474                         posix_open_ok = true;
475                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
476                         if (tcon->ses->serverNOS)
477                                 cERROR(1, "server %s of type %s returned"
478                                            " unexpected error on SMB posix open"
479                                            ", disabling posix open support."
480                                            " Check if server update available.",
481                                            tcon->ses->serverName,
482                                            tcon->ses->serverNOS);
483                         tcon->broken_posix_open = true;
484                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
485                          (rc != -EOPNOTSUPP)) /* path not found or net err */
486                         goto out;
487                 /*
488                  * Else fallthrough to retry open the old way on network i/o
489                  * or DFS errors.
490                  */
491         }
492
493         if (server->ops->get_lease_key)
494                 server->ops->get_lease_key(inode, &fid);
495
496         cifs_add_pending_open(&fid, tlink, &open);
497
498         if (!posix_open_ok) {
499                 if (server->ops->get_lease_key)
500                         server->ops->get_lease_key(inode, &fid);
501
502                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
503                                   file->f_flags, &oplock, &fid, xid);
504                 if (rc) {
505                         cifs_del_pending_open(&open);
506                         goto out;
507                 }
508         }
509
510         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
511         if (cfile == NULL) {
512                 if (server->ops->close)
513                         server->ops->close(xid, tcon, &fid);
514                 cifs_del_pending_open(&open);
515                 rc = -ENOMEM;
516                 goto out;
517         }
518
519         cifs_fscache_set_inode_cookie(inode, file);
520
521         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
522                 /*
523                  * Time to set mode which we can not set earlier due to
524                  * problems creating new read-only files.
525                  */
526                 struct cifs_unix_set_info_args args = {
527                         .mode   = inode->i_mode,
528                         .uid    = INVALID_UID, /* no change */
529                         .gid    = INVALID_GID, /* no change */
530                         .ctime  = NO_CHANGE_64,
531                         .atime  = NO_CHANGE_64,
532                         .mtime  = NO_CHANGE_64,
533                         .device = 0,
534                 };
535                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
536                                        cfile->pid);
537         }
538
539 out:
540         kfree(full_path);
541         free_xid(xid);
542         cifs_put_tlink(tlink);
543         return rc;
544 }
545
546 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
547
548 /*
549  * Try to reacquire byte range locks that were released when session
550  * to server was lost.
551  */
552 static int
553 cifs_relock_file(struct cifsFileInfo *cfile)
554 {
555         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
556         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
557         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
558         int rc = 0;
559
560         /* we are going to update can_cache_brlcks here - need a write access */
561         down_write(&cinode->lock_sem);
562         if (cinode->can_cache_brlcks) {
563                 /* can cache locks - no need to push them */
564                 up_write(&cinode->lock_sem);
565                 return rc;
566         }
567
568         if (cap_unix(tcon->ses) &&
569             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
570             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
571                 rc = cifs_push_posix_locks(cfile);
572         else
573                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
574
575         up_write(&cinode->lock_sem);
576         return rc;
577 }
578
579 static int
580 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
581 {
582         int rc = -EACCES;
583         unsigned int xid;
584         __u32 oplock;
585         struct cifs_sb_info *cifs_sb;
586         struct cifs_tcon *tcon;
587         struct TCP_Server_Info *server;
588         struct cifsInodeInfo *cinode;
589         struct inode *inode;
590         char *full_path = NULL;
591         int desired_access;
592         int disposition = FILE_OPEN;
593         int create_options = CREATE_NOT_DIR;
594         struct cifs_fid fid;
595
596         xid = get_xid();
597         mutex_lock(&cfile->fh_mutex);
598         if (!cfile->invalidHandle) {
599                 mutex_unlock(&cfile->fh_mutex);
600                 rc = 0;
601                 free_xid(xid);
602                 return rc;
603         }
604
605         inode = cfile->dentry->d_inode;
606         cifs_sb = CIFS_SB(inode->i_sb);
607         tcon = tlink_tcon(cfile->tlink);
608         server = tcon->ses->server;
609
610         /*
611          * Can not grab rename sem here because various ops, including those
612          * that already have the rename sem can end up causing writepage to get
613          * called and if the server was down that means we end up here, and we
614          * can never tell if the caller already has the rename_sem.
615          */
616         full_path = build_path_from_dentry(cfile->dentry);
617         if (full_path == NULL) {
618                 rc = -ENOMEM;
619                 mutex_unlock(&cfile->fh_mutex);
620                 free_xid(xid);
621                 return rc;
622         }
623
624         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
625              full_path);
626
627         if (tcon->ses->server->oplocks)
628                 oplock = REQ_OPLOCK;
629         else
630                 oplock = 0;
631
632         if (tcon->unix_ext && cap_unix(tcon->ses) &&
633             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
634                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
635                 /*
636                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
637                  * original open. Must mask them off for a reopen.
638                  */
639                 unsigned int oflags = cfile->f_flags &
640                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
641
642                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
643                                      cifs_sb->mnt_file_mode /* ignored */,
644                                      oflags, &oplock, &fid.netfid, xid);
645                 if (rc == 0) {
646                         cFYI(1, "posix reopen succeeded");
647                         goto reopen_success;
648                 }
649                 /*
650                  * fallthrough to retry open the old way on errors, especially
651                  * in the reconnect path it is important to retry hard
652                  */
653         }
654
655         desired_access = cifs_convert_flags(cfile->f_flags);
656
657         if (backup_cred(cifs_sb))
658                 create_options |= CREATE_OPEN_BACKUP_INTENT;
659
660         if (server->ops->get_lease_key)
661                 server->ops->get_lease_key(inode, &fid);
662
663         /*
664          * Can not refresh inode by passing in file_info buf to be returned by
665          * CIFSSMBOpen and then calling get_inode_info with returned buf since
666          * file might have write behind data that needs to be flushed and server
667          * version of file size can be stale. If we knew for sure that inode was
668          * not dirty locally we could do this.
669          */
670         rc = server->ops->open(xid, tcon, full_path, disposition,
671                                desired_access, create_options, &fid, &oplock,
672                                NULL, cifs_sb);
673         if (rc) {
674                 mutex_unlock(&cfile->fh_mutex);
675                 cFYI(1, "cifs_reopen returned 0x%x", rc);
676                 cFYI(1, "oplock: %d", oplock);
677                 goto reopen_error_exit;
678         }
679
680 reopen_success:
681         cfile->invalidHandle = false;
682         mutex_unlock(&cfile->fh_mutex);
683         cinode = CIFS_I(inode);
684
685         if (can_flush) {
686                 rc = filemap_write_and_wait(inode->i_mapping);
687                 mapping_set_error(inode->i_mapping, rc);
688
689                 if (tcon->unix_ext)
690                         rc = cifs_get_inode_info_unix(&inode, full_path,
691                                                       inode->i_sb, xid);
692                 else
693                         rc = cifs_get_inode_info(&inode, full_path, NULL,
694                                                  inode->i_sb, xid, NULL);
695         }
696         /*
697          * Else we are writing out data to server already and could deadlock if
698          * we tried to flush data, and since we do not know if we have data that
699          * would invalidate the current end of file on the server we can not go
700          * to the server to get the new inode info.
701          */
702
703         server->ops->set_fid(cfile, &fid, oplock);
704         cifs_relock_file(cfile);
705
706 reopen_error_exit:
707         kfree(full_path);
708         free_xid(xid);
709         return rc;
710 }
711
712 int cifs_close(struct inode *inode, struct file *file)
713 {
714         if (file->private_data != NULL) {
715                 cifsFileInfo_put(file->private_data);
716                 file->private_data = NULL;
717         }
718
719         /* return code from the ->release op is always ignored */
720         return 0;
721 }
722
723 int cifs_closedir(struct inode *inode, struct file *file)
724 {
725         int rc = 0;
726         unsigned int xid;
727         struct cifsFileInfo *cfile = file->private_data;
728         struct cifs_tcon *tcon;
729         struct TCP_Server_Info *server;
730         char *buf;
731
732         cFYI(1, "Closedir inode = 0x%p", inode);
733
734         if (cfile == NULL)
735                 return rc;
736
737         xid = get_xid();
738         tcon = tlink_tcon(cfile->tlink);
739         server = tcon->ses->server;
740
741         cFYI(1, "Freeing private data in close dir");
742         spin_lock(&cifs_file_list_lock);
743         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
744                 cfile->invalidHandle = true;
745                 spin_unlock(&cifs_file_list_lock);
746                 if (server->ops->close_dir)
747                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
748                 else
749                         rc = -ENOSYS;
750                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
751                 /* not much we can do if it fails anyway, ignore rc */
752                 rc = 0;
753         } else
754                 spin_unlock(&cifs_file_list_lock);
755
756         buf = cfile->srch_inf.ntwrk_buf_start;
757         if (buf) {
758                 cFYI(1, "closedir free smb buf in srch struct");
759                 cfile->srch_inf.ntwrk_buf_start = NULL;
760                 if (cfile->srch_inf.smallBuf)
761                         cifs_small_buf_release(buf);
762                 else
763                         cifs_buf_release(buf);
764         }
765
766         cifs_put_tlink(cfile->tlink);
767         kfree(file->private_data);
768         file->private_data = NULL;
769         /* BB can we lock the filestruct while this is going on? */
770         free_xid(xid);
771         return rc;
772 }
773
774 static struct cifsLockInfo *
775 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
776 {
777         struct cifsLockInfo *lock =
778                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
779         if (!lock)
780                 return lock;
781         lock->offset = offset;
782         lock->length = length;
783         lock->type = type;
784         lock->pid = current->tgid;
785         INIT_LIST_HEAD(&lock->blist);
786         init_waitqueue_head(&lock->block_q);
787         return lock;
788 }
789
790 void
791 cifs_del_lock_waiters(struct cifsLockInfo *lock)
792 {
793         struct cifsLockInfo *li, *tmp;
794         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
795                 list_del_init(&li->blist);
796                 wake_up(&li->block_q);
797         }
798 }
799
800 #define CIFS_LOCK_OP    0
801 #define CIFS_READ_OP    1
802 #define CIFS_WRITE_OP   2
803
804 /* @rw_check : 0 - no op, 1 - read, 2 - write */
805 static bool
806 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
807                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
808                             struct cifsLockInfo **conf_lock, int rw_check)
809 {
810         struct cifsLockInfo *li;
811         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
812         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
813
814         list_for_each_entry(li, &fdlocks->locks, llist) {
815                 if (offset + length <= li->offset ||
816                     offset >= li->offset + li->length)
817                         continue;
818                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
819                     server->ops->compare_fids(cfile, cur_cfile)) {
820                         /* shared lock prevents write op through the same fid */
821                         if (!(li->type & server->vals->shared_lock_type) ||
822                             rw_check != CIFS_WRITE_OP)
823                                 continue;
824                 }
825                 if ((type & server->vals->shared_lock_type) &&
826                     ((server->ops->compare_fids(cfile, cur_cfile) &&
827                      current->tgid == li->pid) || type == li->type))
828                         continue;
829                 if (conf_lock)
830                         *conf_lock = li;
831                 return true;
832         }
833         return false;
834 }
835
836 bool
837 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
838                         __u8 type, struct cifsLockInfo **conf_lock,
839                         int rw_check)
840 {
841         bool rc = false;
842         struct cifs_fid_locks *cur;
843         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
844
845         list_for_each_entry(cur, &cinode->llist, llist) {
846                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
847                                                  cfile, conf_lock, rw_check);
848                 if (rc)
849                         break;
850         }
851
852         return rc;
853 }
854
855 /*
856  * Check if there is another lock that prevents us to set the lock (mandatory
857  * style). If such a lock exists, update the flock structure with its
858  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
859  * or leave it the same if we can't. Returns 0 if we don't need to request to
860  * the server or 1 otherwise.
861  */
862 static int
863 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
864                __u8 type, struct file_lock *flock)
865 {
866         int rc = 0;
867         struct cifsLockInfo *conf_lock;
868         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
869         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
870         bool exist;
871
872         down_read(&cinode->lock_sem);
873
874         exist = cifs_find_lock_conflict(cfile, offset, length, type,
875                                         &conf_lock, CIFS_LOCK_OP);
876         if (exist) {
877                 flock->fl_start = conf_lock->offset;
878                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
879                 flock->fl_pid = conf_lock->pid;
880                 if (conf_lock->type & server->vals->shared_lock_type)
881                         flock->fl_type = F_RDLCK;
882                 else
883                         flock->fl_type = F_WRLCK;
884         } else if (!cinode->can_cache_brlcks)
885                 rc = 1;
886         else
887                 flock->fl_type = F_UNLCK;
888
889         up_read(&cinode->lock_sem);
890         return rc;
891 }
892
893 static void
894 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
895 {
896         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
897         down_write(&cinode->lock_sem);
898         list_add_tail(&lock->llist, &cfile->llist->locks);
899         up_write(&cinode->lock_sem);
900 }
901
902 /*
903  * Set the byte-range lock (mandatory style). Returns:
904  * 1) 0, if we set the lock and don't need to request to the server;
905  * 2) 1, if no locks prevent us but we need to request to the server;
906  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
907  */
908 static int
909 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
910                  bool wait)
911 {
912         struct cifsLockInfo *conf_lock;
913         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
914         bool exist;
915         int rc = 0;
916
917 try_again:
918         exist = false;
919         down_write(&cinode->lock_sem);
920
921         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
922                                         lock->type, &conf_lock, CIFS_LOCK_OP);
923         if (!exist && cinode->can_cache_brlcks) {
924                 list_add_tail(&lock->llist, &cfile->llist->locks);
925                 up_write(&cinode->lock_sem);
926                 return rc;
927         }
928
929         if (!exist)
930                 rc = 1;
931         else if (!wait)
932                 rc = -EACCES;
933         else {
934                 list_add_tail(&lock->blist, &conf_lock->blist);
935                 up_write(&cinode->lock_sem);
936                 rc = wait_event_interruptible(lock->block_q,
937                                         (lock->blist.prev == &lock->blist) &&
938                                         (lock->blist.next == &lock->blist));
939                 if (!rc)
940                         goto try_again;
941                 down_write(&cinode->lock_sem);
942                 list_del_init(&lock->blist);
943         }
944
945         up_write(&cinode->lock_sem);
946         return rc;
947 }
948
949 /*
950  * Check if there is another lock that prevents us to set the lock (posix
951  * style). If such a lock exists, update the flock structure with its
952  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
953  * or leave it the same if we can't. Returns 0 if we don't need to request to
954  * the server or 1 otherwise.
955  */
956 static int
957 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
958 {
959         int rc = 0;
960         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
961         unsigned char saved_type = flock->fl_type;
962
963         if ((flock->fl_flags & FL_POSIX) == 0)
964                 return 1;
965
966         down_read(&cinode->lock_sem);
967         posix_test_lock(file, flock);
968
969         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
970                 flock->fl_type = saved_type;
971                 rc = 1;
972         }
973
974         up_read(&cinode->lock_sem);
975         return rc;
976 }
977
978 /*
979  * Set the byte-range lock (posix style). Returns:
980  * 1) 0, if we set the lock and don't need to request to the server;
981  * 2) 1, if we need to request to the server;
982  * 3) <0, if the error occurs while setting the lock.
983  */
984 static int
985 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
986 {
987         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
988         int rc = 1;
989
990         if ((flock->fl_flags & FL_POSIX) == 0)
991                 return rc;
992
993 try_again:
994         down_write(&cinode->lock_sem);
995         if (!cinode->can_cache_brlcks) {
996                 up_write(&cinode->lock_sem);
997                 return rc;
998         }
999
1000         rc = posix_lock_file(file, flock, NULL);
1001         up_write(&cinode->lock_sem);
1002         if (rc == FILE_LOCK_DEFERRED) {
1003                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1004                 if (!rc)
1005                         goto try_again;
1006                 locks_delete_block(flock);
1007         }
1008         return rc;
1009 }
1010
1011 int
1012 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1013 {
1014         unsigned int xid;
1015         int rc = 0, stored_rc;
1016         struct cifsLockInfo *li, *tmp;
1017         struct cifs_tcon *tcon;
1018         unsigned int num, max_num, max_buf;
1019         LOCKING_ANDX_RANGE *buf, *cur;
1020         int types[] = {LOCKING_ANDX_LARGE_FILES,
1021                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1022         int i;
1023
1024         xid = get_xid();
1025         tcon = tlink_tcon(cfile->tlink);
1026
1027         /*
1028          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1029          * and check it for zero before using.
1030          */
1031         max_buf = tcon->ses->server->maxBuf;
1032         if (!max_buf) {
1033                 free_xid(xid);
1034                 return -EINVAL;
1035         }
1036
1037         max_num = (max_buf - sizeof(struct smb_hdr)) /
1038                                                 sizeof(LOCKING_ANDX_RANGE);
1039         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1040         if (!buf) {
1041                 free_xid(xid);
1042                 return -ENOMEM;
1043         }
1044
1045         for (i = 0; i < 2; i++) {
1046                 cur = buf;
1047                 num = 0;
1048                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1049                         if (li->type != types[i])
1050                                 continue;
1051                         cur->Pid = cpu_to_le16(li->pid);
1052                         cur->LengthLow = cpu_to_le32((u32)li->length);
1053                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1054                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1055                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1056                         if (++num == max_num) {
1057                                 stored_rc = cifs_lockv(xid, tcon,
1058                                                        cfile->fid.netfid,
1059                                                        (__u8)li->type, 0, num,
1060                                                        buf);
1061                                 if (stored_rc)
1062                                         rc = stored_rc;
1063                                 cur = buf;
1064                                 num = 0;
1065                         } else
1066                                 cur++;
1067                 }
1068
1069                 if (num) {
1070                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1071                                                (__u8)types[i], 0, num, buf);
1072                         if (stored_rc)
1073                                 rc = stored_rc;
1074                 }
1075         }
1076
1077         kfree(buf);
1078         free_xid(xid);
1079         return rc;
1080 }
1081
1082 /* copied from fs/locks.c with a name change */
1083 #define cifs_for_each_lock(inode, lockp) \
1084         for (lockp = &inode->i_flock; *lockp != NULL; \
1085              lockp = &(*lockp)->fl_next)
1086
1087 struct lock_to_push {
1088         struct list_head llist;
1089         __u64 offset;
1090         __u64 length;
1091         __u32 pid;
1092         __u16 netfid;
1093         __u8 type;
1094 };
1095
1096 static int
1097 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1098 {
1099         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1100         struct file_lock *flock, **before;
1101         unsigned int count = 0, i = 0;
1102         int rc = 0, xid, type;
1103         struct list_head locks_to_send, *el;
1104         struct lock_to_push *lck, *tmp;
1105         __u64 length;
1106
1107         xid = get_xid();
1108
1109         lock_flocks();
1110         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1111                 if ((*before)->fl_flags & FL_POSIX)
1112                         count++;
1113         }
1114         unlock_flocks();
1115
1116         INIT_LIST_HEAD(&locks_to_send);
1117
1118         /*
1119          * Allocating count locks is enough because no FL_POSIX locks can be
1120          * added to the list while we are holding cinode->lock_sem that
1121          * protects locking operations of this inode.
1122          */
1123         for (; i < count; i++) {
1124                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1125                 if (!lck) {
1126                         rc = -ENOMEM;
1127                         goto err_out;
1128                 }
1129                 list_add_tail(&lck->llist, &locks_to_send);
1130         }
1131
1132         el = locks_to_send.next;
1133         lock_flocks();
1134         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1135                 flock = *before;
1136                 if ((flock->fl_flags & FL_POSIX) == 0)
1137                         continue;
1138                 if (el == &locks_to_send) {
1139                         /*
1140                          * The list ended. We don't have enough allocated
1141                          * structures - something is really wrong.
1142                          */
1143                         cERROR(1, "Can't push all brlocks!");
1144                         break;
1145                 }
1146                 length = 1 + flock->fl_end - flock->fl_start;
1147                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1148                         type = CIFS_RDLCK;
1149                 else
1150                         type = CIFS_WRLCK;
1151                 lck = list_entry(el, struct lock_to_push, llist);
1152                 lck->pid = flock->fl_pid;
1153                 lck->netfid = cfile->fid.netfid;
1154                 lck->length = length;
1155                 lck->type = type;
1156                 lck->offset = flock->fl_start;
1157                 el = el->next;
1158         }
1159         unlock_flocks();
1160
1161         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1162                 int stored_rc;
1163
1164                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1165                                              lck->offset, lck->length, NULL,
1166                                              lck->type, 0);
1167                 if (stored_rc)
1168                         rc = stored_rc;
1169                 list_del(&lck->llist);
1170                 kfree(lck);
1171         }
1172
1173 out:
1174         free_xid(xid);
1175         return rc;
1176 err_out:
1177         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1178                 list_del(&lck->llist);
1179                 kfree(lck);
1180         }
1181         goto out;
1182 }
1183
1184 static int
1185 cifs_push_locks(struct cifsFileInfo *cfile)
1186 {
1187         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1188         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1189         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1190         int rc = 0;
1191
1192         /* we are going to update can_cache_brlcks here - need a write access */
1193         down_write(&cinode->lock_sem);
1194         if (!cinode->can_cache_brlcks) {
1195                 up_write(&cinode->lock_sem);
1196                 return rc;
1197         }
1198
1199         if (cap_unix(tcon->ses) &&
1200             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1201             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1202                 rc = cifs_push_posix_locks(cfile);
1203         else
1204                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1205
1206         cinode->can_cache_brlcks = false;
1207         up_write(&cinode->lock_sem);
1208         return rc;
1209 }
1210
1211 static void
1212 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1213                 bool *wait_flag, struct TCP_Server_Info *server)
1214 {
1215         if (flock->fl_flags & FL_POSIX)
1216                 cFYI(1, "Posix");
1217         if (flock->fl_flags & FL_FLOCK)
1218                 cFYI(1, "Flock");
1219         if (flock->fl_flags & FL_SLEEP) {
1220                 cFYI(1, "Blocking lock");
1221                 *wait_flag = true;
1222         }
1223         if (flock->fl_flags & FL_ACCESS)
1224                 cFYI(1, "Process suspended by mandatory locking - "
1225                         "not implemented yet");
1226         if (flock->fl_flags & FL_LEASE)
1227                 cFYI(1, "Lease on file - not implemented yet");
1228         if (flock->fl_flags &
1229             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1230                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1231                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1232
1233         *type = server->vals->large_lock_type;
1234         if (flock->fl_type == F_WRLCK) {
1235                 cFYI(1, "F_WRLCK ");
1236                 *type |= server->vals->exclusive_lock_type;
1237                 *lock = 1;
1238         } else if (flock->fl_type == F_UNLCK) {
1239                 cFYI(1, "F_UNLCK");
1240                 *type |= server->vals->unlock_lock_type;
1241                 *unlock = 1;
1242                 /* Check if unlock includes more than one lock range */
1243         } else if (flock->fl_type == F_RDLCK) {
1244                 cFYI(1, "F_RDLCK");
1245                 *type |= server->vals->shared_lock_type;
1246                 *lock = 1;
1247         } else if (flock->fl_type == F_EXLCK) {
1248                 cFYI(1, "F_EXLCK");
1249                 *type |= server->vals->exclusive_lock_type;
1250                 *lock = 1;
1251         } else if (flock->fl_type == F_SHLCK) {
1252                 cFYI(1, "F_SHLCK");
1253                 *type |= server->vals->shared_lock_type;
1254                 *lock = 1;
1255         } else
1256                 cFYI(1, "Unknown type of lock");
1257 }
1258
1259 static int
1260 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1261            bool wait_flag, bool posix_lck, unsigned int xid)
1262 {
1263         int rc = 0;
1264         __u64 length = 1 + flock->fl_end - flock->fl_start;
1265         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1266         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1267         struct TCP_Server_Info *server = tcon->ses->server;
1268         __u16 netfid = cfile->fid.netfid;
1269
1270         if (posix_lck) {
1271                 int posix_lock_type;
1272
1273                 rc = cifs_posix_lock_test(file, flock);
1274                 if (!rc)
1275                         return rc;
1276
1277                 if (type & server->vals->shared_lock_type)
1278                         posix_lock_type = CIFS_RDLCK;
1279                 else
1280                         posix_lock_type = CIFS_WRLCK;
1281                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1282                                       flock->fl_start, length, flock,
1283                                       posix_lock_type, wait_flag);
1284                 return rc;
1285         }
1286
1287         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1288         if (!rc)
1289                 return rc;
1290
1291         /* BB we could chain these into one lock request BB */
1292         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1293                                     1, 0, false);
1294         if (rc == 0) {
1295                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1296                                             type, 0, 1, false);
1297                 flock->fl_type = F_UNLCK;
1298                 if (rc != 0)
1299                         cERROR(1, "Error unlocking previously locked "
1300                                   "range %d during test of lock", rc);
1301                 return 0;
1302         }
1303
1304         if (type & server->vals->shared_lock_type) {
1305                 flock->fl_type = F_WRLCK;
1306                 return 0;
1307         }
1308
1309         type &= ~server->vals->exclusive_lock_type;
1310
1311         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1312                                     type | server->vals->shared_lock_type,
1313                                     1, 0, false);
1314         if (rc == 0) {
1315                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1316                         type | server->vals->shared_lock_type, 0, 1, false);
1317                 flock->fl_type = F_RDLCK;
1318                 if (rc != 0)
1319                         cERROR(1, "Error unlocking previously locked "
1320                                   "range %d during test of lock", rc);
1321         } else
1322                 flock->fl_type = F_WRLCK;
1323
1324         return 0;
1325 }
1326
1327 void
1328 cifs_move_llist(struct list_head *source, struct list_head *dest)
1329 {
1330         struct list_head *li, *tmp;
1331         list_for_each_safe(li, tmp, source)
1332                 list_move(li, dest);
1333 }
1334
1335 void
1336 cifs_free_llist(struct list_head *llist)
1337 {
1338         struct cifsLockInfo *li, *tmp;
1339         list_for_each_entry_safe(li, tmp, llist, llist) {
1340                 cifs_del_lock_waiters(li);
1341                 list_del(&li->llist);
1342                 kfree(li);
1343         }
1344 }
1345
1346 int
1347 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1348                   unsigned int xid)
1349 {
1350         int rc = 0, stored_rc;
1351         int types[] = {LOCKING_ANDX_LARGE_FILES,
1352                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1353         unsigned int i;
1354         unsigned int max_num, num, max_buf;
1355         LOCKING_ANDX_RANGE *buf, *cur;
1356         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1357         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1358         struct cifsLockInfo *li, *tmp;
1359         __u64 length = 1 + flock->fl_end - flock->fl_start;
1360         struct list_head tmp_llist;
1361
1362         INIT_LIST_HEAD(&tmp_llist);
1363
1364         /*
1365          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1366          * and check it for zero before using.
1367          */
1368         max_buf = tcon->ses->server->maxBuf;
1369         if (!max_buf)
1370                 return -EINVAL;
1371
1372         max_num = (max_buf - sizeof(struct smb_hdr)) /
1373                                                 sizeof(LOCKING_ANDX_RANGE);
1374         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1375         if (!buf)
1376                 return -ENOMEM;
1377
1378         down_write(&cinode->lock_sem);
1379         for (i = 0; i < 2; i++) {
1380                 cur = buf;
1381                 num = 0;
1382                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1383                         if (flock->fl_start > li->offset ||
1384                             (flock->fl_start + length) <
1385                             (li->offset + li->length))
1386                                 continue;
1387                         if (current->tgid != li->pid)
1388                                 continue;
1389                         if (types[i] != li->type)
1390                                 continue;
1391                         if (cinode->can_cache_brlcks) {
1392                                 /*
1393                                  * We can cache brlock requests - simply remove
1394                                  * a lock from the file's list.
1395                                  */
1396                                 list_del(&li->llist);
1397                                 cifs_del_lock_waiters(li);
1398                                 kfree(li);
1399                                 continue;
1400                         }
1401                         cur->Pid = cpu_to_le16(li->pid);
1402                         cur->LengthLow = cpu_to_le32((u32)li->length);
1403                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1404                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1405                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1406                         /*
1407                          * We need to save a lock here to let us add it again to
1408                          * the file's list if the unlock range request fails on
1409                          * the server.
1410                          */
1411                         list_move(&li->llist, &tmp_llist);
1412                         if (++num == max_num) {
1413                                 stored_rc = cifs_lockv(xid, tcon,
1414                                                        cfile->fid.netfid,
1415                                                        li->type, num, 0, buf);
1416                                 if (stored_rc) {
1417                                         /*
1418                                          * We failed on the unlock range
1419                                          * request - add all locks from the tmp
1420                                          * list to the head of the file's list.
1421                                          */
1422                                         cifs_move_llist(&tmp_llist,
1423                                                         &cfile->llist->locks);
1424                                         rc = stored_rc;
1425                                 } else
1426                                         /*
1427                                          * The unlock range request succeed -
1428                                          * free the tmp list.
1429                                          */
1430                                         cifs_free_llist(&tmp_llist);
1431                                 cur = buf;
1432                                 num = 0;
1433                         } else
1434                                 cur++;
1435                 }
1436                 if (num) {
1437                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1438                                                types[i], num, 0, buf);
1439                         if (stored_rc) {
1440                                 cifs_move_llist(&tmp_llist,
1441                                                 &cfile->llist->locks);
1442                                 rc = stored_rc;
1443                         } else
1444                                 cifs_free_llist(&tmp_llist);
1445                 }
1446         }
1447
1448         up_write(&cinode->lock_sem);
1449         kfree(buf);
1450         return rc;
1451 }
1452
1453 static int
1454 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1455            bool wait_flag, bool posix_lck, int lock, int unlock,
1456            unsigned int xid)
1457 {
1458         int rc = 0;
1459         __u64 length = 1 + flock->fl_end - flock->fl_start;
1460         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1461         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1462         struct TCP_Server_Info *server = tcon->ses->server;
1463         struct inode *inode = cfile->dentry->d_inode;
1464
1465         if (posix_lck) {
1466                 int posix_lock_type;
1467
1468                 rc = cifs_posix_lock_set(file, flock);
1469                 if (!rc || rc < 0)
1470                         return rc;
1471
1472                 if (type & server->vals->shared_lock_type)
1473                         posix_lock_type = CIFS_RDLCK;
1474                 else
1475                         posix_lock_type = CIFS_WRLCK;
1476
1477                 if (unlock == 1)
1478                         posix_lock_type = CIFS_UNLCK;
1479
1480                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1481                                       current->tgid, flock->fl_start, length,
1482                                       NULL, posix_lock_type, wait_flag);
1483                 goto out;
1484         }
1485
1486         if (lock) {
1487                 struct cifsLockInfo *lock;
1488
1489                 lock = cifs_lock_init(flock->fl_start, length, type);
1490                 if (!lock)
1491                         return -ENOMEM;
1492
1493                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1494                 if (rc < 0) {
1495                         kfree(lock);
1496                         return rc;
1497                 }
1498                 if (!rc)
1499                         goto out;
1500
1501                 /*
1502                  * Windows 7 server can delay breaking lease from read to None
1503                  * if we set a byte-range lock on a file - break it explicitly
1504                  * before sending the lock to the server to be sure the next
1505                  * read won't conflict with non-overlapted locks due to
1506                  * pagereading.
1507                  */
1508                 if (!CIFS_I(inode)->clientCanCacheAll &&
1509                                         CIFS_I(inode)->clientCanCacheRead) {
1510                         cifs_invalidate_mapping(inode);
1511                         cFYI(1, "Set no oplock for inode=%p due to mand locks",
1512                              inode);
1513                         CIFS_I(inode)->clientCanCacheRead = false;
1514                 }
1515
1516                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1517                                             type, 1, 0, wait_flag);
1518                 if (rc) {
1519                         kfree(lock);
1520                         return rc;
1521                 }
1522
1523                 cifs_lock_add(cfile, lock);
1524         } else if (unlock)
1525                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1526
1527 out:
1528         if (flock->fl_flags & FL_POSIX)
1529                 posix_lock_file_wait(file, flock);
1530         return rc;
1531 }
1532
1533 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1534 {
1535         int rc, xid;
1536         int lock = 0, unlock = 0;
1537         bool wait_flag = false;
1538         bool posix_lck = false;
1539         struct cifs_sb_info *cifs_sb;
1540         struct cifs_tcon *tcon;
1541         struct cifsInodeInfo *cinode;
1542         struct cifsFileInfo *cfile;
1543         __u16 netfid;
1544         __u32 type;
1545
1546         rc = -EACCES;
1547         xid = get_xid();
1548
1549         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1550                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1551                 flock->fl_start, flock->fl_end);
1552
1553         cfile = (struct cifsFileInfo *)file->private_data;
1554         tcon = tlink_tcon(cfile->tlink);
1555
1556         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1557                         tcon->ses->server);
1558
1559         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1560         netfid = cfile->fid.netfid;
1561         cinode = CIFS_I(file_inode(file));
1562
1563         if (cap_unix(tcon->ses) &&
1564             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1565             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1566                 posix_lck = true;
1567         /*
1568          * BB add code here to normalize offset and length to account for
1569          * negative length which we can not accept over the wire.
1570          */
1571         if (IS_GETLK(cmd)) {
1572                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1573                 free_xid(xid);
1574                 return rc;
1575         }
1576
1577         if (!lock && !unlock) {
1578                 /*
1579                  * if no lock or unlock then nothing to do since we do not
1580                  * know what it is
1581                  */
1582                 free_xid(xid);
1583                 return -EOPNOTSUPP;
1584         }
1585
1586         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1587                         xid);
1588         free_xid(xid);
1589         return rc;
1590 }
1591
1592 /*
1593  * update the file size (if needed) after a write. Should be called with
1594  * the inode->i_lock held
1595  */
1596 void
1597 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1598                       unsigned int bytes_written)
1599 {
1600         loff_t end_of_write = offset + bytes_written;
1601
1602         if (end_of_write > cifsi->server_eof)
1603                 cifsi->server_eof = end_of_write;
1604 }
1605
1606 static ssize_t
1607 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1608            size_t write_size, loff_t *offset)
1609 {
1610         int rc = 0;
1611         unsigned int bytes_written = 0;
1612         unsigned int total_written;
1613         struct cifs_sb_info *cifs_sb;
1614         struct cifs_tcon *tcon;
1615         struct TCP_Server_Info *server;
1616         unsigned int xid;
1617         struct dentry *dentry = open_file->dentry;
1618         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1619         struct cifs_io_parms io_parms;
1620
1621         cifs_sb = CIFS_SB(dentry->d_sb);
1622
1623         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1624              *offset, dentry->d_name.name);
1625
1626         tcon = tlink_tcon(open_file->tlink);
1627         server = tcon->ses->server;
1628
1629         if (!server->ops->sync_write)
1630                 return -ENOSYS;
1631
1632         xid = get_xid();
1633
1634         for (total_written = 0; write_size > total_written;
1635              total_written += bytes_written) {
1636                 rc = -EAGAIN;
1637                 while (rc == -EAGAIN) {
1638                         struct kvec iov[2];
1639                         unsigned int len;
1640
1641                         if (open_file->invalidHandle) {
1642                                 /* we could deadlock if we called
1643                                    filemap_fdatawait from here so tell
1644                                    reopen_file not to flush data to
1645                                    server now */
1646                                 rc = cifs_reopen_file(open_file, false);
1647                                 if (rc != 0)
1648                                         break;
1649                         }
1650
1651                         len = min((size_t)cifs_sb->wsize,
1652                                   write_size - total_written);
1653                         /* iov[0] is reserved for smb header */
1654                         iov[1].iov_base = (char *)write_data + total_written;
1655                         iov[1].iov_len = len;
1656                         io_parms.pid = pid;
1657                         io_parms.tcon = tcon;
1658                         io_parms.offset = *offset;
1659                         io_parms.length = len;
1660                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1661                                                      &bytes_written, iov, 1);
1662                 }
1663                 if (rc || (bytes_written == 0)) {
1664                         if (total_written)
1665                                 break;
1666                         else {
1667                                 free_xid(xid);
1668                                 return rc;
1669                         }
1670                 } else {
1671                         spin_lock(&dentry->d_inode->i_lock);
1672                         cifs_update_eof(cifsi, *offset, bytes_written);
1673                         spin_unlock(&dentry->d_inode->i_lock);
1674                         *offset += bytes_written;
1675                 }
1676         }
1677
1678         cifs_stats_bytes_written(tcon, total_written);
1679
1680         if (total_written > 0) {
1681                 spin_lock(&dentry->d_inode->i_lock);
1682                 if (*offset > dentry->d_inode->i_size)
1683                         i_size_write(dentry->d_inode, *offset);
1684                 spin_unlock(&dentry->d_inode->i_lock);
1685         }
1686         mark_inode_dirty_sync(dentry->d_inode);
1687         free_xid(xid);
1688         return total_written;
1689 }
1690
1691 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1692                                         bool fsuid_only)
1693 {
1694         struct cifsFileInfo *open_file = NULL;
1695         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1696
1697         /* only filter by fsuid on multiuser mounts */
1698         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1699                 fsuid_only = false;
1700
1701         spin_lock(&cifs_file_list_lock);
1702         /* we could simply get the first_list_entry since write-only entries
1703            are always at the end of the list but since the first entry might
1704            have a close pending, we go through the whole list */
1705         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1706                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1707                         continue;
1708                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1709                         if (!open_file->invalidHandle) {
1710                                 /* found a good file */
1711                                 /* lock it so it will not be closed on us */
1712                                 cifsFileInfo_get_locked(open_file);
1713                                 spin_unlock(&cifs_file_list_lock);
1714                                 return open_file;
1715                         } /* else might as well continue, and look for
1716                              another, or simply have the caller reopen it
1717                              again rather than trying to fix this handle */
1718                 } else /* write only file */
1719                         break; /* write only files are last so must be done */
1720         }
1721         spin_unlock(&cifs_file_list_lock);
1722         return NULL;
1723 }
1724
1725 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1726                                         bool fsuid_only)
1727 {
1728         struct cifsFileInfo *open_file, *inv_file = NULL;
1729         struct cifs_sb_info *cifs_sb;
1730         bool any_available = false;
1731         int rc;
1732         unsigned int refind = 0;
1733
1734         /* Having a null inode here (because mapping->host was set to zero by
1735         the VFS or MM) should not happen but we had reports of on oops (due to
1736         it being zero) during stress testcases so we need to check for it */
1737
1738         if (cifs_inode == NULL) {
1739                 cERROR(1, "Null inode passed to cifs_writeable_file");
1740                 dump_stack();
1741                 return NULL;
1742         }
1743
1744         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1745
1746         /* only filter by fsuid on multiuser mounts */
1747         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1748                 fsuid_only = false;
1749
1750         spin_lock(&cifs_file_list_lock);
1751 refind_writable:
1752         if (refind > MAX_REOPEN_ATT) {
1753                 spin_unlock(&cifs_file_list_lock);
1754                 return NULL;
1755         }
1756         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1757                 if (!any_available && open_file->pid != current->tgid)
1758                         continue;
1759                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1760                         continue;
1761                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1762                         if (!open_file->invalidHandle) {
1763                                 /* found a good writable file */
1764                                 cifsFileInfo_get_locked(open_file);
1765                                 spin_unlock(&cifs_file_list_lock);
1766                                 return open_file;
1767                         } else {
1768                                 if (!inv_file)
1769                                         inv_file = open_file;
1770                         }
1771                 }
1772         }
1773         /* couldn't find useable FH with same pid, try any available */
1774         if (!any_available) {
1775                 any_available = true;
1776                 goto refind_writable;
1777         }
1778
1779         if (inv_file) {
1780                 any_available = false;
1781                 cifsFileInfo_get_locked(inv_file);
1782         }
1783
1784         spin_unlock(&cifs_file_list_lock);
1785
1786         if (inv_file) {
1787                 rc = cifs_reopen_file(inv_file, false);
1788                 if (!rc)
1789                         return inv_file;
1790                 else {
1791                         spin_lock(&cifs_file_list_lock);
1792                         list_move_tail(&inv_file->flist,
1793                                         &cifs_inode->openFileList);
1794                         spin_unlock(&cifs_file_list_lock);
1795                         cifsFileInfo_put(inv_file);
1796                         spin_lock(&cifs_file_list_lock);
1797                         ++refind;
1798                         goto refind_writable;
1799                 }
1800         }
1801
1802         return NULL;
1803 }
1804
1805 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1806 {
1807         struct address_space *mapping = page->mapping;
1808         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1809         char *write_data;
1810         int rc = -EFAULT;
1811         int bytes_written = 0;
1812         struct inode *inode;
1813         struct cifsFileInfo *open_file;
1814
1815         if (!mapping || !mapping->host)
1816                 return -EFAULT;
1817
1818         inode = page->mapping->host;
1819
1820         offset += (loff_t)from;
1821         write_data = kmap(page);
1822         write_data += from;
1823
1824         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1825                 kunmap(page);
1826                 return -EIO;
1827         }
1828
1829         /* racing with truncate? */
1830         if (offset > mapping->host->i_size) {
1831                 kunmap(page);
1832                 return 0; /* don't care */
1833         }
1834
1835         /* check to make sure that we are not extending the file */
1836         if (mapping->host->i_size - offset < (loff_t)to)
1837                 to = (unsigned)(mapping->host->i_size - offset);
1838
1839         open_file = find_writable_file(CIFS_I(mapping->host), false);
1840         if (open_file) {
1841                 bytes_written = cifs_write(open_file, open_file->pid,
1842                                            write_data, to - from, &offset);
1843                 cifsFileInfo_put(open_file);
1844                 /* Does mm or vfs already set times? */
1845                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1846                 if ((bytes_written > 0) && (offset))
1847                         rc = 0;
1848                 else if (bytes_written < 0)
1849                         rc = bytes_written;
1850         } else {
1851                 cFYI(1, "No writeable filehandles for inode");
1852                 rc = -EIO;
1853         }
1854
1855         kunmap(page);
1856         return rc;
1857 }
1858
1859 static int cifs_writepages(struct address_space *mapping,
1860                            struct writeback_control *wbc)
1861 {
1862         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1863         bool done = false, scanned = false, range_whole = false;
1864         pgoff_t end, index;
1865         struct cifs_writedata *wdata;
1866         struct TCP_Server_Info *server;
1867         struct page *page;
1868         int rc = 0;
1869
1870         /*
1871          * If wsize is smaller than the page cache size, default to writing
1872          * one page at a time via cifs_writepage
1873          */
1874         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1875                 return generic_writepages(mapping, wbc);
1876
1877         if (wbc->range_cyclic) {
1878                 index = mapping->writeback_index; /* Start from prev offset */
1879                 end = -1;
1880         } else {
1881                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1882                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1883                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1884                         range_whole = true;
1885                 scanned = true;
1886         }
1887 retry:
1888         while (!done && index <= end) {
1889                 unsigned int i, nr_pages, found_pages;
1890                 pgoff_t next = 0, tofind;
1891                 struct page **pages;
1892
1893                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1894                                 end - index) + 1;
1895
1896                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1897                                              cifs_writev_complete);
1898                 if (!wdata) {
1899                         rc = -ENOMEM;
1900                         break;
1901                 }
1902
1903                 /*
1904                  * find_get_pages_tag seems to return a max of 256 on each
1905                  * iteration, so we must call it several times in order to
1906                  * fill the array or the wsize is effectively limited to
1907                  * 256 * PAGE_CACHE_SIZE.
1908                  */
1909                 found_pages = 0;
1910                 pages = wdata->pages;
1911                 do {
1912                         nr_pages = find_get_pages_tag(mapping, &index,
1913                                                         PAGECACHE_TAG_DIRTY,
1914                                                         tofind, pages);
1915                         found_pages += nr_pages;
1916                         tofind -= nr_pages;
1917                         pages += nr_pages;
1918                 } while (nr_pages && tofind && index <= end);
1919
1920                 if (found_pages == 0) {
1921                         kref_put(&wdata->refcount, cifs_writedata_release);
1922                         break;
1923                 }
1924
1925                 nr_pages = 0;
1926                 for (i = 0; i < found_pages; i++) {
1927                         page = wdata->pages[i];
1928                         /*
1929                          * At this point we hold neither mapping->tree_lock nor
1930                          * lock on the page itself: the page may be truncated or
1931                          * invalidated (changing page->mapping to NULL), or even
1932                          * swizzled back from swapper_space to tmpfs file
1933                          * mapping
1934                          */
1935
1936                         if (nr_pages == 0)
1937                                 lock_page(page);
1938                         else if (!trylock_page(page))
1939                                 break;
1940
1941                         if (unlikely(page->mapping != mapping)) {
1942                                 unlock_page(page);
1943                                 break;
1944                         }
1945
1946                         if (!wbc->range_cyclic && page->index > end) {
1947                                 done = true;
1948                                 unlock_page(page);
1949                                 break;
1950                         }
1951
1952                         if (next && (page->index != next)) {
1953                                 /* Not next consecutive page */
1954                                 unlock_page(page);
1955                                 break;
1956                         }
1957
1958                         if (wbc->sync_mode != WB_SYNC_NONE)
1959                                 wait_on_page_writeback(page);
1960
1961                         if (PageWriteback(page) ||
1962                                         !clear_page_dirty_for_io(page)) {
1963                                 unlock_page(page);
1964                                 break;
1965                         }
1966
1967                         /*
1968                          * This actually clears the dirty bit in the radix tree.
1969                          * See cifs_writepage() for more commentary.
1970                          */
1971                         set_page_writeback(page);
1972
1973                         if (page_offset(page) >= i_size_read(mapping->host)) {
1974                                 done = true;
1975                                 unlock_page(page);
1976                                 end_page_writeback(page);
1977                                 break;
1978                         }
1979
1980                         wdata->pages[i] = page;
1981                         next = page->index + 1;
1982                         ++nr_pages;
1983                 }
1984
1985                 /* reset index to refind any pages skipped */
1986                 if (nr_pages == 0)
1987                         index = wdata->pages[0]->index + 1;
1988
1989                 /* put any pages we aren't going to use */
1990                 for (i = nr_pages; i < found_pages; i++) {
1991                         page_cache_release(wdata->pages[i]);
1992                         wdata->pages[i] = NULL;
1993                 }
1994
1995                 /* nothing to write? */
1996                 if (nr_pages == 0) {
1997                         kref_put(&wdata->refcount, cifs_writedata_release);
1998                         continue;
1999                 }
2000
2001                 wdata->sync_mode = wbc->sync_mode;
2002                 wdata->nr_pages = nr_pages;
2003                 wdata->offset = page_offset(wdata->pages[0]);
2004                 wdata->pagesz = PAGE_CACHE_SIZE;
2005                 wdata->tailsz =
2006                         min(i_size_read(mapping->host) -
2007                             page_offset(wdata->pages[nr_pages - 1]),
2008                             (loff_t)PAGE_CACHE_SIZE);
2009                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2010                                         wdata->tailsz;
2011
2012                 do {
2013                         if (wdata->cfile != NULL)
2014                                 cifsFileInfo_put(wdata->cfile);
2015                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2016                                                           false);
2017                         if (!wdata->cfile) {
2018                                 cERROR(1, "No writable handles for inode");
2019                                 rc = -EBADF;
2020                                 break;
2021                         }
2022                         wdata->pid = wdata->cfile->pid;
2023                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2024                         rc = server->ops->async_writev(wdata);
2025                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2026
2027                 for (i = 0; i < nr_pages; ++i)
2028                         unlock_page(wdata->pages[i]);
2029
2030                 /* send failure -- clean up the mess */
2031                 if (rc != 0) {
2032                         for (i = 0; i < nr_pages; ++i) {
2033                                 if (rc == -EAGAIN)
2034                                         redirty_page_for_writepage(wbc,
2035                                                            wdata->pages[i]);
2036                                 else
2037                                         SetPageError(wdata->pages[i]);
2038                                 end_page_writeback(wdata->pages[i]);
2039                                 page_cache_release(wdata->pages[i]);
2040                         }
2041                         if (rc != -EAGAIN)
2042                                 mapping_set_error(mapping, rc);
2043                 }
2044                 kref_put(&wdata->refcount, cifs_writedata_release);
2045
2046                 wbc->nr_to_write -= nr_pages;
2047                 if (wbc->nr_to_write <= 0)
2048                         done = true;
2049
2050                 index = next;
2051         }
2052
2053         if (!scanned && !done) {
2054                 /*
2055                  * We hit the last page and there is more work to be done: wrap
2056                  * back to the start of the file
2057                  */
2058                 scanned = true;
2059                 index = 0;
2060                 goto retry;
2061         }
2062
2063         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2064                 mapping->writeback_index = index;
2065
2066         return rc;
2067 }
2068
2069 static int
2070 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2071 {
2072         int rc;
2073         unsigned int xid;
2074
2075         xid = get_xid();
2076 /* BB add check for wbc flags */
2077         page_cache_get(page);
2078         if (!PageUptodate(page))
2079                 cFYI(1, "ppw - page not up to date");
2080
2081         /*
2082          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2083          *
2084          * A writepage() implementation always needs to do either this,
2085          * or re-dirty the page with "redirty_page_for_writepage()" in
2086          * the case of a failure.
2087          *
2088          * Just unlocking the page will cause the radix tree tag-bits
2089          * to fail to update with the state of the page correctly.
2090          */
2091         set_page_writeback(page);
2092 retry_write:
2093         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2094         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2095                 goto retry_write;
2096         else if (rc == -EAGAIN)
2097                 redirty_page_for_writepage(wbc, page);
2098         else if (rc != 0)
2099                 SetPageError(page);
2100         else
2101                 SetPageUptodate(page);
2102         end_page_writeback(page);
2103         page_cache_release(page);
2104         free_xid(xid);
2105         return rc;
2106 }
2107
2108 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2109 {
2110         int rc = cifs_writepage_locked(page, wbc);
2111         unlock_page(page);
2112         return rc;
2113 }
2114
2115 static int cifs_write_end(struct file *file, struct address_space *mapping,
2116                         loff_t pos, unsigned len, unsigned copied,
2117                         struct page *page, void *fsdata)
2118 {
2119         int rc;
2120         struct inode *inode = mapping->host;
2121         struct cifsFileInfo *cfile = file->private_data;
2122         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2123         __u32 pid;
2124
2125         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2126                 pid = cfile->pid;
2127         else
2128                 pid = current->tgid;
2129
2130         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2131                  page, pos, copied);
2132
2133         if (PageChecked(page)) {
2134                 if (copied == len)
2135                         SetPageUptodate(page);
2136                 ClearPageChecked(page);
2137         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2138                 SetPageUptodate(page);
2139
2140         if (!PageUptodate(page)) {
2141                 char *page_data;
2142                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2143                 unsigned int xid;
2144
2145                 xid = get_xid();
2146                 /* this is probably better than directly calling
2147                    partialpage_write since in this function the file handle is
2148                    known which we might as well leverage */
2149                 /* BB check if anything else missing out of ppw
2150                    such as updating last write time */
2151                 page_data = kmap(page);
2152                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2153                 /* if (rc < 0) should we set writebehind rc? */
2154                 kunmap(page);
2155
2156                 free_xid(xid);
2157         } else {
2158                 rc = copied;
2159                 pos += copied;
2160                 set_page_dirty(page);
2161         }
2162
2163         if (rc > 0) {
2164                 spin_lock(&inode->i_lock);
2165                 if (pos > inode->i_size)
2166                         i_size_write(inode, pos);
2167                 spin_unlock(&inode->i_lock);
2168         }
2169
2170         unlock_page(page);
2171         page_cache_release(page);
2172
2173         return rc;
2174 }
2175
2176 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2177                       int datasync)
2178 {
2179         unsigned int xid;
2180         int rc = 0;
2181         struct cifs_tcon *tcon;
2182         struct TCP_Server_Info *server;
2183         struct cifsFileInfo *smbfile = file->private_data;
2184         struct inode *inode = file_inode(file);
2185         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2186
2187         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2188         if (rc)
2189                 return rc;
2190         mutex_lock(&inode->i_mutex);
2191
2192         xid = get_xid();
2193
2194         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2195                 file->f_path.dentry->d_name.name, datasync);
2196
2197         if (!CIFS_I(inode)->clientCanCacheRead) {
2198                 rc = cifs_invalidate_mapping(inode);
2199                 if (rc) {
2200                         cFYI(1, "rc: %d during invalidate phase", rc);
2201                         rc = 0; /* don't care about it in fsync */
2202                 }
2203         }
2204
2205         tcon = tlink_tcon(smbfile->tlink);
2206         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2207                 server = tcon->ses->server;
2208                 if (server->ops->flush)
2209                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2210                 else
2211                         rc = -ENOSYS;
2212         }
2213
2214         free_xid(xid);
2215         mutex_unlock(&inode->i_mutex);
2216         return rc;
2217 }
2218
2219 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2220 {
2221         unsigned int xid;
2222         int rc = 0;
2223         struct cifs_tcon *tcon;
2224         struct TCP_Server_Info *server;
2225         struct cifsFileInfo *smbfile = file->private_data;
2226         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2227         struct inode *inode = file->f_mapping->host;
2228
2229         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2230         if (rc)
2231                 return rc;
2232         mutex_lock(&inode->i_mutex);
2233
2234         xid = get_xid();
2235
2236         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2237                 file->f_path.dentry->d_name.name, datasync);
2238
2239         tcon = tlink_tcon(smbfile->tlink);
2240         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2241                 server = tcon->ses->server;
2242                 if (server->ops->flush)
2243                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2244                 else
2245                         rc = -ENOSYS;
2246         }
2247
2248         free_xid(xid);
2249         mutex_unlock(&inode->i_mutex);
2250         return rc;
2251 }
2252
2253 /*
2254  * As file closes, flush all cached write data for this inode checking
2255  * for write behind errors.
2256  */
2257 int cifs_flush(struct file *file, fl_owner_t id)
2258 {
2259         struct inode *inode = file_inode(file);
2260         int rc = 0;
2261
2262         if (file->f_mode & FMODE_WRITE)
2263                 rc = filemap_write_and_wait(inode->i_mapping);
2264
2265         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2266
2267         return rc;
2268 }
2269
2270 static int
2271 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2272 {
2273         int rc = 0;
2274         unsigned long i;
2275
2276         for (i = 0; i < num_pages; i++) {
2277                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2278                 if (!pages[i]) {
2279                         /*
2280                          * save number of pages we have already allocated and
2281                          * return with ENOMEM error
2282                          */
2283                         num_pages = i;
2284                         rc = -ENOMEM;
2285                         break;
2286                 }
2287         }
2288
2289         if (rc) {
2290                 for (i = 0; i < num_pages; i++)
2291                         put_page(pages[i]);
2292         }
2293         return rc;
2294 }
2295
2296 static inline
2297 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2298 {
2299         size_t num_pages;
2300         size_t clen;
2301
2302         clen = min_t(const size_t, len, wsize);
2303         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2304
2305         if (cur_len)
2306                 *cur_len = clen;
2307
2308         return num_pages;
2309 }
2310
2311 static void
2312 cifs_uncached_writev_complete(struct work_struct *work)
2313 {
2314         int i;
2315         struct cifs_writedata *wdata = container_of(work,
2316                                         struct cifs_writedata, work);
2317         struct inode *inode = wdata->cfile->dentry->d_inode;
2318         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2319
2320         spin_lock(&inode->i_lock);
2321         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2322         if (cifsi->server_eof > inode->i_size)
2323                 i_size_write(inode, cifsi->server_eof);
2324         spin_unlock(&inode->i_lock);
2325
2326         complete(&wdata->done);
2327
2328         if (wdata->result != -EAGAIN) {
2329                 for (i = 0; i < wdata->nr_pages; i++)
2330                         put_page(wdata->pages[i]);
2331         }
2332
2333         kref_put(&wdata->refcount, cifs_writedata_release);
2334 }
2335
2336 /* attempt to send write to server, retry on any -EAGAIN errors */
2337 static int
2338 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2339 {
2340         int rc;
2341         struct TCP_Server_Info *server;
2342
2343         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2344
2345         do {
2346                 if (wdata->cfile->invalidHandle) {
2347                         rc = cifs_reopen_file(wdata->cfile, false);
2348                         if (rc != 0)
2349                                 continue;
2350                 }
2351                 rc = server->ops->async_writev(wdata);
2352         } while (rc == -EAGAIN);
2353
2354         return rc;
2355 }
2356
2357 static ssize_t
2358 cifs_iovec_write(struct file *file, const struct iovec *iov,
2359                  unsigned long nr_segs, loff_t *poffset)
2360 {
2361         unsigned long nr_pages, i;
2362         size_t copied, len, cur_len;
2363         ssize_t total_written = 0;
2364         loff_t offset;
2365         struct iov_iter it;
2366         struct cifsFileInfo *open_file;
2367         struct cifs_tcon *tcon;
2368         struct cifs_sb_info *cifs_sb;
2369         struct cifs_writedata *wdata, *tmp;
2370         struct list_head wdata_list;
2371         int rc;
2372         pid_t pid;
2373
2374         len = iov_length(iov, nr_segs);
2375         if (!len)
2376                 return 0;
2377
2378         rc = generic_write_checks(file, poffset, &len, 0);
2379         if (rc)
2380                 return rc;
2381
2382         INIT_LIST_HEAD(&wdata_list);
2383         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2384         open_file = file->private_data;
2385         tcon = tlink_tcon(open_file->tlink);
2386
2387         if (!tcon->ses->server->ops->async_writev)
2388                 return -ENOSYS;
2389
2390         offset = *poffset;
2391
2392         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2393                 pid = open_file->pid;
2394         else
2395                 pid = current->tgid;
2396
2397         iov_iter_init(&it, iov, nr_segs, len, 0);
2398         do {
2399                 size_t save_len;
2400
2401                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2402                 wdata = cifs_writedata_alloc(nr_pages,
2403                                              cifs_uncached_writev_complete);
2404                 if (!wdata) {
2405                         rc = -ENOMEM;
2406                         break;
2407                 }
2408
2409                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2410                 if (rc) {
2411                         kfree(wdata);
2412                         break;
2413                 }
2414
2415                 save_len = cur_len;
2416                 for (i = 0; i < nr_pages; i++) {
2417                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2418                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2419                                                          0, copied);
2420                         cur_len -= copied;
2421                         iov_iter_advance(&it, copied);
2422                 }
2423                 cur_len = save_len - cur_len;
2424
2425                 wdata->sync_mode = WB_SYNC_ALL;
2426                 wdata->nr_pages = nr_pages;
2427                 wdata->offset = (__u64)offset;
2428                 wdata->cfile = cifsFileInfo_get(open_file);
2429                 wdata->pid = pid;
2430                 wdata->bytes = cur_len;
2431                 wdata->pagesz = PAGE_SIZE;
2432                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2433                 rc = cifs_uncached_retry_writev(wdata);
2434                 if (rc) {
2435                         kref_put(&wdata->refcount, cifs_writedata_release);
2436                         break;
2437                 }
2438
2439                 list_add_tail(&wdata->list, &wdata_list);
2440                 offset += cur_len;
2441                 len -= cur_len;
2442         } while (len > 0);
2443
2444         /*
2445          * If at least one write was successfully sent, then discard any rc
2446          * value from the later writes. If the other write succeeds, then
2447          * we'll end up returning whatever was written. If it fails, then
2448          * we'll get a new rc value from that.
2449          */
2450         if (!list_empty(&wdata_list))
2451                 rc = 0;
2452
2453         /*
2454          * Wait for and collect replies for any successful sends in order of
2455          * increasing offset. Once an error is hit or we get a fatal signal
2456          * while waiting, then return without waiting for any more replies.
2457          */
2458 restart_loop:
2459         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2460                 if (!rc) {
2461                         /* FIXME: freezable too? */
2462                         rc = wait_for_completion_killable(&wdata->done);
2463                         if (rc)
2464                                 rc = -EINTR;
2465                         else if (wdata->result)
2466                                 rc = wdata->result;
2467                         else
2468                                 total_written += wdata->bytes;
2469
2470                         /* resend call if it's a retryable error */
2471                         if (rc == -EAGAIN) {
2472                                 rc = cifs_uncached_retry_writev(wdata);
2473                                 goto restart_loop;
2474                         }
2475                 }
2476                 list_del_init(&wdata->list);
2477                 kref_put(&wdata->refcount, cifs_writedata_release);
2478         }
2479
2480         if (total_written > 0)
2481                 *poffset += total_written;
2482
2483         cifs_stats_bytes_written(tcon, total_written);
2484         return total_written ? total_written : (ssize_t)rc;
2485 }
2486
2487 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2488                                 unsigned long nr_segs, loff_t pos)
2489 {
2490         ssize_t written;
2491         struct inode *inode;
2492
2493         inode = file_inode(iocb->ki_filp);
2494
2495         /*
2496          * BB - optimize the way when signing is disabled. We can drop this
2497          * extra memory-to-memory copying and use iovec buffers for constructing
2498          * write request.
2499          */
2500
2501         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2502         if (written > 0) {
2503                 CIFS_I(inode)->invalid_mapping = true;
2504                 iocb->ki_pos = pos;
2505         }
2506
2507         return written;
2508 }
2509
2510 static ssize_t
2511 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2512             unsigned long nr_segs, loff_t pos)
2513 {
2514         struct file *file = iocb->ki_filp;
2515         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2516         struct inode *inode = file->f_mapping->host;
2517         struct cifsInodeInfo *cinode = CIFS_I(inode);
2518         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2519         ssize_t rc = -EACCES;
2520
2521         BUG_ON(iocb->ki_pos != pos);
2522
2523         sb_start_write(inode->i_sb);
2524
2525         /*
2526          * We need to hold the sem to be sure nobody modifies lock list
2527          * with a brlock that prevents writing.
2528          */
2529         down_read(&cinode->lock_sem);
2530         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2531                                      server->vals->exclusive_lock_type, NULL,
2532                                      CIFS_WRITE_OP)) {
2533                 mutex_lock(&inode->i_mutex);
2534                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2535                                                &iocb->ki_pos);
2536                 mutex_unlock(&inode->i_mutex);
2537         }
2538
2539         if (rc > 0 || rc == -EIOCBQUEUED) {
2540                 ssize_t err;
2541
2542                 err = generic_write_sync(file, pos, rc);
2543                 if (err < 0 && rc > 0)
2544                         rc = err;
2545         }
2546
2547         up_read(&cinode->lock_sem);
2548         sb_end_write(inode->i_sb);
2549         return rc;
2550 }
2551
2552 ssize_t
2553 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2554                    unsigned long nr_segs, loff_t pos)
2555 {
2556         struct inode *inode = file_inode(iocb->ki_filp);
2557         struct cifsInodeInfo *cinode = CIFS_I(inode);
2558         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2559         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2560                                                 iocb->ki_filp->private_data;
2561         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2562         ssize_t written;
2563
2564         if (cinode->clientCanCacheAll) {
2565                 if (cap_unix(tcon->ses) &&
2566                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2567                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2568                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2569                 return cifs_writev(iocb, iov, nr_segs, pos);
2570         }
2571         /*
2572          * For non-oplocked files in strict cache mode we need to write the data
2573          * to the server exactly from the pos to pos+len-1 rather than flush all
2574          * affected pages because it may cause a error with mandatory locks on
2575          * these pages but not on the region from pos to ppos+len-1.
2576          */
2577         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2578         if (written > 0 && cinode->clientCanCacheRead) {
2579                 /*
2580                  * Windows 7 server can delay breaking level2 oplock if a write
2581                  * request comes - break it on the client to prevent reading
2582                  * an old data.
2583                  */
2584                 cifs_invalidate_mapping(inode);
2585                 cFYI(1, "Set no oplock for inode=%p after a write operation",
2586                      inode);
2587                 cinode->clientCanCacheRead = false;
2588         }
2589         return written;
2590 }
2591
2592 static struct cifs_readdata *
2593 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2594 {
2595         struct cifs_readdata *rdata;
2596
2597         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2598                         GFP_KERNEL);
2599         if (rdata != NULL) {
2600                 kref_init(&rdata->refcount);
2601                 INIT_LIST_HEAD(&rdata->list);
2602                 init_completion(&rdata->done);
2603                 INIT_WORK(&rdata->work, complete);
2604         }
2605
2606         return rdata;
2607 }
2608
2609 void
2610 cifs_readdata_release(struct kref *refcount)
2611 {
2612         struct cifs_readdata *rdata = container_of(refcount,
2613                                         struct cifs_readdata, refcount);
2614
2615         if (rdata->cfile)
2616                 cifsFileInfo_put(rdata->cfile);
2617
2618         kfree(rdata);
2619 }
2620
2621 static int
2622 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2623 {
2624         int rc = 0;
2625         struct page *page;
2626         unsigned int i;
2627
2628         for (i = 0; i < nr_pages; i++) {
2629                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2630                 if (!page) {
2631                         rc = -ENOMEM;
2632                         break;
2633                 }
2634                 rdata->pages[i] = page;
2635         }
2636
2637         if (rc) {
2638                 for (i = 0; i < nr_pages; i++) {
2639                         put_page(rdata->pages[i]);
2640                         rdata->pages[i] = NULL;
2641                 }
2642         }
2643         return rc;
2644 }
2645
2646 static void
2647 cifs_uncached_readdata_release(struct kref *refcount)
2648 {
2649         struct cifs_readdata *rdata = container_of(refcount,
2650                                         struct cifs_readdata, refcount);
2651         unsigned int i;
2652
2653         for (i = 0; i < rdata->nr_pages; i++) {
2654                 put_page(rdata->pages[i]);
2655                 rdata->pages[i] = NULL;
2656         }
2657         cifs_readdata_release(refcount);
2658 }
2659
2660 static int
2661 cifs_retry_async_readv(struct cifs_readdata *rdata)
2662 {
2663         int rc;
2664         struct TCP_Server_Info *server;
2665
2666         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2667
2668         do {
2669                 if (rdata->cfile->invalidHandle) {
2670                         rc = cifs_reopen_file(rdata->cfile, true);
2671                         if (rc != 0)
2672                                 continue;
2673                 }
2674                 rc = server->ops->async_readv(rdata);
2675         } while (rc == -EAGAIN);
2676
2677         return rc;
2678 }
2679
2680 /**
2681  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2682  * @rdata:      the readdata response with list of pages holding data
2683  * @iov:        vector in which we should copy the data
2684  * @nr_segs:    number of segments in vector
2685  * @offset:     offset into file of the first iovec
2686  * @copied:     used to return the amount of data copied to the iov
2687  *
2688  * This function copies data from a list of pages in a readdata response into
2689  * an array of iovecs. It will first calculate where the data should go
2690  * based on the info in the readdata and then copy the data into that spot.
2691  */
2692 static ssize_t
2693 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2694                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2695 {
2696         int rc = 0;
2697         struct iov_iter ii;
2698         size_t pos = rdata->offset - offset;
2699         ssize_t remaining = rdata->bytes;
2700         unsigned char *pdata;
2701         unsigned int i;
2702
2703         /* set up iov_iter and advance to the correct offset */
2704         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2705         iov_iter_advance(&ii, pos);
2706
2707         *copied = 0;
2708         for (i = 0; i < rdata->nr_pages; i++) {
2709                 ssize_t copy;
2710                 struct page *page = rdata->pages[i];
2711
2712                 /* copy a whole page or whatever's left */
2713                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2714
2715                 /* ...but limit it to whatever space is left in the iov */
2716                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2717
2718                 /* go while there's data to be copied and no errors */
2719                 if (copy && !rc) {
2720                         pdata = kmap(page);
2721                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2722                                                 (int)copy);
2723                         kunmap(page);
2724                         if (!rc) {
2725                                 *copied += copy;
2726                                 remaining -= copy;
2727                                 iov_iter_advance(&ii, copy);
2728                         }
2729                 }
2730         }
2731
2732         return rc;
2733 }
2734
2735 static void
2736 cifs_uncached_readv_complete(struct work_struct *work)
2737 {
2738         struct cifs_readdata *rdata = container_of(work,
2739                                                 struct cifs_readdata, work);
2740
2741         complete(&rdata->done);
2742         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2743 }
2744
2745 static int
2746 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2747                         struct cifs_readdata *rdata, unsigned int len)
2748 {
2749         int total_read = 0, result = 0;
2750         unsigned int i;
2751         unsigned int nr_pages = rdata->nr_pages;
2752         struct kvec iov;
2753
2754         rdata->tailsz = PAGE_SIZE;
2755         for (i = 0; i < nr_pages; i++) {
2756                 struct page *page = rdata->pages[i];
2757
2758                 if (len >= PAGE_SIZE) {
2759                         /* enough data to fill the page */
2760                         iov.iov_base = kmap(page);
2761                         iov.iov_len = PAGE_SIZE;
2762                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2763                                 i, iov.iov_base, iov.iov_len);
2764                         len -= PAGE_SIZE;
2765                 } else if (len > 0) {
2766                         /* enough for partial page, fill and zero the rest */
2767                         iov.iov_base = kmap(page);
2768                         iov.iov_len = len;
2769                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2770                                 i, iov.iov_base, iov.iov_len);
2771                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2772                         rdata->tailsz = len;
2773                         len = 0;
2774                 } else {
2775                         /* no need to hold page hostage */
2776                         rdata->pages[i] = NULL;
2777                         rdata->nr_pages--;
2778                         put_page(page);
2779                         continue;
2780                 }
2781
2782                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2783                 kunmap(page);
2784                 if (result < 0)
2785                         break;
2786
2787                 total_read += result;
2788         }
2789
2790         return total_read > 0 ? total_read : result;
2791 }
2792
2793 static ssize_t
2794 cifs_iovec_read(struct file *file, const struct iovec *iov,
2795                  unsigned long nr_segs, loff_t *poffset)
2796 {
2797         ssize_t rc;
2798         size_t len, cur_len;
2799         ssize_t total_read = 0;
2800         loff_t offset = *poffset;
2801         unsigned int npages;
2802         struct cifs_sb_info *cifs_sb;
2803         struct cifs_tcon *tcon;
2804         struct cifsFileInfo *open_file;
2805         struct cifs_readdata *rdata, *tmp;
2806         struct list_head rdata_list;
2807         pid_t pid;
2808
2809         if (!nr_segs)
2810                 return 0;
2811
2812         len = iov_length(iov, nr_segs);
2813         if (!len)
2814                 return 0;
2815
2816         INIT_LIST_HEAD(&rdata_list);
2817         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2818         open_file = file->private_data;
2819         tcon = tlink_tcon(open_file->tlink);
2820
2821         if (!tcon->ses->server->ops->async_readv)
2822                 return -ENOSYS;
2823
2824         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2825                 pid = open_file->pid;
2826         else
2827                 pid = current->tgid;
2828
2829         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2830                 cFYI(1, "attempting read on write only file instance");
2831
2832         do {
2833                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2834                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2835
2836                 /* allocate a readdata struct */
2837                 rdata = cifs_readdata_alloc(npages,
2838                                             cifs_uncached_readv_complete);
2839                 if (!rdata) {
2840                         rc = -ENOMEM;
2841                         goto error;
2842                 }
2843
2844                 rc = cifs_read_allocate_pages(rdata, npages);
2845                 if (rc)
2846                         goto error;
2847
2848                 rdata->cfile = cifsFileInfo_get(open_file);
2849                 rdata->nr_pages = npages;
2850                 rdata->offset = offset;
2851                 rdata->bytes = cur_len;
2852                 rdata->pid = pid;
2853                 rdata->pagesz = PAGE_SIZE;
2854                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2855
2856                 rc = cifs_retry_async_readv(rdata);
2857 error:
2858                 if (rc) {
2859                         kref_put(&rdata->refcount,
2860                                  cifs_uncached_readdata_release);
2861                         break;
2862                 }
2863
2864                 list_add_tail(&rdata->list, &rdata_list);
2865                 offset += cur_len;
2866                 len -= cur_len;
2867         } while (len > 0);
2868
2869         /* if at least one read request send succeeded, then reset rc */
2870         if (!list_empty(&rdata_list))
2871                 rc = 0;
2872
2873         /* the loop below should proceed in the order of increasing offsets */
2874 restart_loop:
2875         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2876                 if (!rc) {
2877                         ssize_t copied;
2878
2879                         /* FIXME: freezable sleep too? */
2880                         rc = wait_for_completion_killable(&rdata->done);
2881                         if (rc)
2882                                 rc = -EINTR;
2883                         else if (rdata->result)
2884                                 rc = rdata->result;
2885                         else {
2886                                 rc = cifs_readdata_to_iov(rdata, iov,
2887                                                         nr_segs, *poffset,
2888                                                         &copied);
2889                                 total_read += copied;
2890                         }
2891
2892                         /* resend call if it's a retryable error */
2893                         if (rc == -EAGAIN) {
2894                                 rc = cifs_retry_async_readv(rdata);
2895                                 goto restart_loop;
2896                         }
2897                 }
2898                 list_del_init(&rdata->list);
2899                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2900         }
2901
2902         cifs_stats_bytes_read(tcon, total_read);
2903         *poffset += total_read;
2904
2905         /* mask nodata case */
2906         if (rc == -ENODATA)
2907                 rc = 0;
2908
2909         return total_read ? total_read : rc;
2910 }
2911
2912 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2913                                unsigned long nr_segs, loff_t pos)
2914 {
2915         ssize_t read;
2916
2917         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2918         if (read > 0)
2919                 iocb->ki_pos = pos;
2920
2921         return read;
2922 }
2923
2924 ssize_t
2925 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2926                   unsigned long nr_segs, loff_t pos)
2927 {
2928         struct inode *inode = file_inode(iocb->ki_filp);
2929         struct cifsInodeInfo *cinode = CIFS_I(inode);
2930         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2931         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2932                                                 iocb->ki_filp->private_data;
2933         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2934         int rc = -EACCES;
2935
2936         /*
2937          * In strict cache mode we need to read from the server all the time
2938          * if we don't have level II oplock because the server can delay mtime
2939          * change - so we can't make a decision about inode invalidating.
2940          * And we can also fail with pagereading if there are mandatory locks
2941          * on pages affected by this read but not on the region from pos to
2942          * pos+len-1.
2943          */
2944         if (!cinode->clientCanCacheRead)
2945                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2946
2947         if (cap_unix(tcon->ses) &&
2948             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2949             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2950                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2951
2952         /*
2953          * We need to hold the sem to be sure nobody modifies lock list
2954          * with a brlock that prevents reading.
2955          */
2956         down_read(&cinode->lock_sem);
2957         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2958                                      tcon->ses->server->vals->shared_lock_type,
2959                                      NULL, CIFS_READ_OP))
2960                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2961         up_read(&cinode->lock_sem);
2962         return rc;
2963 }
2964
2965 static ssize_t
2966 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2967 {
2968         int rc = -EACCES;
2969         unsigned int bytes_read = 0;
2970         unsigned int total_read;
2971         unsigned int current_read_size;
2972         unsigned int rsize;
2973         struct cifs_sb_info *cifs_sb;
2974         struct cifs_tcon *tcon;
2975         struct TCP_Server_Info *server;
2976         unsigned int xid;
2977         char *cur_offset;
2978         struct cifsFileInfo *open_file;
2979         struct cifs_io_parms io_parms;
2980         int buf_type = CIFS_NO_BUFFER;
2981         __u32 pid;
2982
2983         xid = get_xid();
2984         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2985
2986         /* FIXME: set up handlers for larger reads and/or convert to async */
2987         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2988
2989         if (file->private_data == NULL) {
2990                 rc = -EBADF;
2991                 free_xid(xid);
2992                 return rc;
2993         }
2994         open_file = file->private_data;
2995         tcon = tlink_tcon(open_file->tlink);
2996         server = tcon->ses->server;
2997
2998         if (!server->ops->sync_read) {
2999                 free_xid(xid);
3000                 return -ENOSYS;
3001         }
3002
3003         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3004                 pid = open_file->pid;
3005         else
3006                 pid = current->tgid;
3007
3008         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3009                 cFYI(1, "attempting read on write only file instance");
3010
3011         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3012              total_read += bytes_read, cur_offset += bytes_read) {
3013                 current_read_size = min_t(uint, read_size - total_read, rsize);
3014                 /*
3015                  * For windows me and 9x we do not want to request more than it
3016                  * negotiated since it will refuse the read then.
3017                  */
3018                 if ((tcon->ses) && !(tcon->ses->capabilities &
3019                                 tcon->ses->server->vals->cap_large_files)) {
3020                         current_read_size = min_t(uint, current_read_size,
3021                                         CIFSMaxBufSize);
3022                 }
3023                 rc = -EAGAIN;
3024                 while (rc == -EAGAIN) {
3025                         if (open_file->invalidHandle) {
3026                                 rc = cifs_reopen_file(open_file, true);
3027                                 if (rc != 0)
3028                                         break;
3029                         }
3030                         io_parms.pid = pid;
3031                         io_parms.tcon = tcon;
3032                         io_parms.offset = *offset;
3033                         io_parms.length = current_read_size;
3034                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3035                                                     &bytes_read, &cur_offset,
3036                                                     &buf_type);
3037                 }
3038                 if (rc || (bytes_read == 0)) {
3039                         if (total_read) {
3040                                 break;
3041                         } else {
3042                                 free_xid(xid);
3043                                 return rc;
3044                         }
3045                 } else {
3046                         cifs_stats_bytes_read(tcon, total_read);
3047                         *offset += bytes_read;
3048                 }
3049         }
3050         free_xid(xid);
3051         return total_read;
3052 }
3053
3054 /*
3055  * If the page is mmap'ed into a process' page tables, then we need to make
3056  * sure that it doesn't change while being written back.
3057  */
3058 static int
3059 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3060 {
3061         struct page *page = vmf->page;
3062
3063         lock_page(page);
3064         return VM_FAULT_LOCKED;
3065 }
3066
3067 static struct vm_operations_struct cifs_file_vm_ops = {
3068         .fault = filemap_fault,
3069         .page_mkwrite = cifs_page_mkwrite,
3070         .remap_pages = generic_file_remap_pages,
3071 };
3072
3073 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3074 {
3075         int rc, xid;
3076         struct inode *inode = file_inode(file);
3077
3078         xid = get_xid();
3079
3080         if (!CIFS_I(inode)->clientCanCacheRead) {
3081                 rc = cifs_invalidate_mapping(inode);
3082                 if (rc)
3083                         return rc;
3084         }
3085
3086         rc = generic_file_mmap(file, vma);
3087         if (rc == 0)
3088                 vma->vm_ops = &cifs_file_vm_ops;
3089         free_xid(xid);
3090         return rc;
3091 }
3092
3093 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3094 {
3095         int rc, xid;
3096
3097         xid = get_xid();
3098         rc = cifs_revalidate_file(file);
3099         if (rc) {
3100                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3101                 free_xid(xid);
3102                 return rc;
3103         }
3104         rc = generic_file_mmap(file, vma);
3105         if (rc == 0)
3106                 vma->vm_ops = &cifs_file_vm_ops;
3107         free_xid(xid);
3108         return rc;
3109 }
3110
3111 static void
3112 cifs_readv_complete(struct work_struct *work)
3113 {
3114         unsigned int i;
3115         struct cifs_readdata *rdata = container_of(work,
3116                                                 struct cifs_readdata, work);
3117
3118         for (i = 0; i < rdata->nr_pages; i++) {
3119                 struct page *page = rdata->pages[i];
3120
3121                 lru_cache_add_file(page);
3122
3123                 if (rdata->result == 0) {
3124                         flush_dcache_page(page);
3125                         SetPageUptodate(page);
3126                 }
3127
3128                 unlock_page(page);
3129
3130                 if (rdata->result == 0)
3131                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3132
3133                 page_cache_release(page);
3134                 rdata->pages[i] = NULL;
3135         }
3136         kref_put(&rdata->refcount, cifs_readdata_release);
3137 }
3138
3139 static int
3140 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3141                         struct cifs_readdata *rdata, unsigned int len)
3142 {
3143         int total_read = 0, result = 0;
3144         unsigned int i;
3145         u64 eof;
3146         pgoff_t eof_index;
3147         unsigned int nr_pages = rdata->nr_pages;
3148         struct kvec iov;
3149
3150         /* determine the eof that the server (probably) has */
3151         eof = CIFS_I(rdata->mapping->host)->server_eof;
3152         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3153         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3154
3155         rdata->tailsz = PAGE_CACHE_SIZE;
3156         for (i = 0; i < nr_pages; i++) {
3157                 struct page *page = rdata->pages[i];
3158
3159                 if (len >= PAGE_CACHE_SIZE) {
3160                         /* enough data to fill the page */
3161                         iov.iov_base = kmap(page);
3162                         iov.iov_len = PAGE_CACHE_SIZE;
3163                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3164                                 i, page->index, iov.iov_base, iov.iov_len);
3165                         len -= PAGE_CACHE_SIZE;
3166                 } else if (len > 0) {
3167                         /* enough for partial page, fill and zero the rest */
3168                         iov.iov_base = kmap(page);
3169                         iov.iov_len = len;
3170                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3171                                 i, page->index, iov.iov_base, iov.iov_len);
3172                         memset(iov.iov_base + len,
3173                                 '\0', PAGE_CACHE_SIZE - len);
3174                         rdata->tailsz = len;
3175                         len = 0;
3176                 } else if (page->index > eof_index) {
3177                         /*
3178                          * The VFS will not try to do readahead past the
3179                          * i_size, but it's possible that we have outstanding
3180                          * writes with gaps in the middle and the i_size hasn't
3181                          * caught up yet. Populate those with zeroed out pages
3182                          * to prevent the VFS from repeatedly attempting to
3183                          * fill them until the writes are flushed.
3184                          */
3185                         zero_user(page, 0, PAGE_CACHE_SIZE);
3186                         lru_cache_add_file(page);
3187                         flush_dcache_page(page);
3188                         SetPageUptodate(page);
3189                         unlock_page(page);
3190                         page_cache_release(page);
3191                         rdata->pages[i] = NULL;
3192                         rdata->nr_pages--;
3193                         continue;
3194                 } else {
3195                         /* no need to hold page hostage */
3196                         lru_cache_add_file(page);
3197                         unlock_page(page);
3198                         page_cache_release(page);
3199                         rdata->pages[i] = NULL;
3200                         rdata->nr_pages--;
3201                         continue;
3202                 }
3203
3204                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3205                 kunmap(page);
3206                 if (result < 0)
3207                         break;
3208
3209                 total_read += result;
3210         }
3211
3212         return total_read > 0 ? total_read : result;
3213 }
3214
3215 static int cifs_readpages(struct file *file, struct address_space *mapping,
3216         struct list_head *page_list, unsigned num_pages)
3217 {
3218         int rc;
3219         struct list_head tmplist;
3220         struct cifsFileInfo *open_file = file->private_data;
3221         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3222         unsigned int rsize = cifs_sb->rsize;
3223         pid_t pid;
3224
3225         /*
3226          * Give up immediately if rsize is too small to read an entire page.
3227          * The VFS will fall back to readpage. We should never reach this
3228          * point however since we set ra_pages to 0 when the rsize is smaller
3229          * than a cache page.
3230          */
3231         if (unlikely(rsize < PAGE_CACHE_SIZE))
3232                 return 0;
3233
3234         /*
3235          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3236          * immediately if the cookie is negative
3237          */
3238         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3239                                          &num_pages);
3240         if (rc == 0)
3241                 return rc;
3242
3243         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3244                 pid = open_file->pid;
3245         else
3246                 pid = current->tgid;
3247
3248         rc = 0;
3249         INIT_LIST_HEAD(&tmplist);
3250
3251         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3252                 mapping, num_pages);
3253
3254         /*
3255          * Start with the page at end of list and move it to private
3256          * list. Do the same with any following pages until we hit
3257          * the rsize limit, hit an index discontinuity, or run out of
3258          * pages. Issue the async read and then start the loop again
3259          * until the list is empty.
3260          *
3261          * Note that list order is important. The page_list is in
3262          * the order of declining indexes. When we put the pages in
3263          * the rdata->pages, then we want them in increasing order.
3264          */
3265         while (!list_empty(page_list)) {
3266                 unsigned int i;
3267                 unsigned int bytes = PAGE_CACHE_SIZE;
3268                 unsigned int expected_index;
3269                 unsigned int nr_pages = 1;
3270                 loff_t offset;
3271                 struct page *page, *tpage;
3272                 struct cifs_readdata *rdata;
3273
3274                 page = list_entry(page_list->prev, struct page, lru);
3275
3276                 /*
3277                  * Lock the page and put it in the cache. Since no one else
3278                  * should have access to this page, we're safe to simply set
3279                  * PG_locked without checking it first.
3280                  */
3281                 __set_page_locked(page);
3282                 rc = add_to_page_cache_locked(page, mapping,
3283                                               page->index, GFP_KERNEL);
3284
3285                 /* give up if we can't stick it in the cache */
3286                 if (rc) {
3287                         __clear_page_locked(page);
3288                         break;
3289                 }
3290
3291                 /* move first page to the tmplist */
3292                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3293                 list_move_tail(&page->lru, &tmplist);
3294
3295                 /* now try and add more pages onto the request */
3296                 expected_index = page->index + 1;
3297                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3298                         /* discontinuity ? */
3299                         if (page->index != expected_index)
3300                                 break;
3301
3302                         /* would this page push the read over the rsize? */
3303                         if (bytes + PAGE_CACHE_SIZE > rsize)
3304                                 break;
3305
3306                         __set_page_locked(page);
3307                         if (add_to_page_cache_locked(page, mapping,
3308                                                 page->index, GFP_KERNEL)) {
3309                                 __clear_page_locked(page);
3310                                 break;
3311                         }
3312                         list_move_tail(&page->lru, &tmplist);
3313                         bytes += PAGE_CACHE_SIZE;
3314                         expected_index++;
3315                         nr_pages++;
3316                 }
3317
3318                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3319                 if (!rdata) {
3320                         /* best to give up if we're out of mem */
3321                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3322                                 list_del(&page->lru);
3323                                 lru_cache_add_file(page);
3324                                 unlock_page(page);
3325                                 page_cache_release(page);
3326                         }
3327                         rc = -ENOMEM;
3328                         break;
3329                 }
3330
3331                 rdata->cfile = cifsFileInfo_get(open_file);
3332                 rdata->mapping = mapping;
3333                 rdata->offset = offset;
3334                 rdata->bytes = bytes;
3335                 rdata->pid = pid;
3336                 rdata->pagesz = PAGE_CACHE_SIZE;
3337                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3338
3339                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3340                         list_del(&page->lru);
3341                         rdata->pages[rdata->nr_pages++] = page;
3342                 }
3343
3344                 rc = cifs_retry_async_readv(rdata);
3345                 if (rc != 0) {
3346                         for (i = 0; i < rdata->nr_pages; i++) {
3347                                 page = rdata->pages[i];
3348                                 lru_cache_add_file(page);
3349                                 unlock_page(page);
3350                                 page_cache_release(page);
3351                         }
3352                         kref_put(&rdata->refcount, cifs_readdata_release);
3353                         break;
3354                 }
3355
3356                 kref_put(&rdata->refcount, cifs_readdata_release);
3357         }
3358
3359         return rc;
3360 }
3361
3362 static int cifs_readpage_worker(struct file *file, struct page *page,
3363         loff_t *poffset)
3364 {
3365         char *read_data;
3366         int rc;
3367
3368         /* Is the page cached? */
3369         rc = cifs_readpage_from_fscache(file_inode(file), page);
3370         if (rc == 0)
3371                 goto read_complete;
3372
3373         page_cache_get(page);
3374         read_data = kmap(page);
3375         /* for reads over a certain size could initiate async read ahead */
3376
3377         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3378
3379         if (rc < 0)
3380                 goto io_error;
3381         else
3382                 cFYI(1, "Bytes read %d", rc);
3383
3384         file_inode(file)->i_atime =
3385                 current_fs_time(file_inode(file)->i_sb);
3386
3387         if (PAGE_CACHE_SIZE > rc)
3388                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3389
3390         flush_dcache_page(page);
3391         SetPageUptodate(page);
3392
3393         /* send this page to the cache */
3394         cifs_readpage_to_fscache(file_inode(file), page);
3395
3396         rc = 0;
3397
3398 io_error:
3399         kunmap(page);
3400         page_cache_release(page);
3401
3402 read_complete:
3403         return rc;
3404 }
3405
3406 static int cifs_readpage(struct file *file, struct page *page)
3407 {
3408         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3409         int rc = -EACCES;
3410         unsigned int xid;
3411
3412         xid = get_xid();
3413
3414         if (file->private_data == NULL) {
3415                 rc = -EBADF;
3416                 free_xid(xid);
3417                 return rc;
3418         }
3419
3420         cFYI(1, "readpage %p at offset %d 0x%x",
3421                  page, (int)offset, (int)offset);
3422
3423         rc = cifs_readpage_worker(file, page, &offset);
3424
3425         unlock_page(page);
3426
3427         free_xid(xid);
3428         return rc;
3429 }
3430
3431 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3432 {
3433         struct cifsFileInfo *open_file;
3434
3435         spin_lock(&cifs_file_list_lock);
3436         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3437                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3438                         spin_unlock(&cifs_file_list_lock);
3439                         return 1;
3440                 }
3441         }
3442         spin_unlock(&cifs_file_list_lock);
3443         return 0;
3444 }
3445
3446 /* We do not want to update the file size from server for inodes
3447    open for write - to avoid races with writepage extending
3448    the file - in the future we could consider allowing
3449    refreshing the inode only on increases in the file size
3450    but this is tricky to do without racing with writebehind
3451    page caching in the current Linux kernel design */
3452 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3453 {
3454         if (!cifsInode)
3455                 return true;
3456
3457         if (is_inode_writable(cifsInode)) {
3458                 /* This inode is open for write at least once */
3459                 struct cifs_sb_info *cifs_sb;
3460
3461                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3462                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3463                         /* since no page cache to corrupt on directio
3464                         we can change size safely */
3465                         return true;
3466                 }
3467
3468                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3469                         return true;
3470
3471                 return false;
3472         } else
3473                 return true;
3474 }
3475
3476 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3477                         loff_t pos, unsigned len, unsigned flags,
3478                         struct page **pagep, void **fsdata)
3479 {
3480         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3481         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3482         loff_t page_start = pos & PAGE_MASK;
3483         loff_t i_size;
3484         struct page *page;
3485         int rc = 0;
3486
3487         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3488
3489         page = grab_cache_page_write_begin(mapping, index, flags);
3490         if (!page) {
3491                 rc = -ENOMEM;
3492                 goto out;
3493         }
3494
3495         if (PageUptodate(page))
3496                 goto out;
3497
3498         /*
3499          * If we write a full page it will be up to date, no need to read from
3500          * the server. If the write is short, we'll end up doing a sync write
3501          * instead.
3502          */
3503         if (len == PAGE_CACHE_SIZE)
3504                 goto out;
3505
3506         /*
3507          * optimize away the read when we have an oplock, and we're not
3508          * expecting to use any of the data we'd be reading in. That
3509          * is, when the page lies beyond the EOF, or straddles the EOF
3510          * and the write will cover all of the existing data.
3511          */
3512         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3513                 i_size = i_size_read(mapping->host);
3514                 if (page_start >= i_size ||
3515                     (offset == 0 && (pos + len) >= i_size)) {
3516                         zero_user_segments(page, 0, offset,
3517                                            offset + len,
3518                                            PAGE_CACHE_SIZE);
3519                         /*
3520                          * PageChecked means that the parts of the page
3521                          * to which we're not writing are considered up
3522                          * to date. Once the data is copied to the
3523                          * page, it can be set uptodate.
3524                          */
3525                         SetPageChecked(page);
3526                         goto out;
3527                 }
3528         }
3529
3530         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3531                 /*
3532                  * might as well read a page, it is fast enough. If we get
3533                  * an error, we don't need to return it. cifs_write_end will
3534                  * do a sync write instead since PG_uptodate isn't set.
3535                  */
3536                 cifs_readpage_worker(file, page, &page_start);
3537         } else {
3538                 /* we could try using another file handle if there is one -
3539                    but how would we lock it to prevent close of that handle
3540                    racing with this read? In any case
3541                    this will be written out by write_end so is fine */
3542         }
3543 out:
3544         *pagep = page;
3545         return rc;
3546 }
3547
3548 static int cifs_release_page(struct page *page, gfp_t gfp)
3549 {
3550         if (PagePrivate(page))
3551                 return 0;
3552
3553         return cifs_fscache_release_page(page, gfp);
3554 }
3555
3556 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3557 {
3558         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3559
3560         if (offset == 0)
3561                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3562 }
3563
3564 static int cifs_launder_page(struct page *page)
3565 {
3566         int rc = 0;
3567         loff_t range_start = page_offset(page);
3568         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3569         struct writeback_control wbc = {
3570                 .sync_mode = WB_SYNC_ALL,
3571                 .nr_to_write = 0,
3572                 .range_start = range_start,
3573                 .range_end = range_end,
3574         };
3575
3576         cFYI(1, "Launder page: %p", page);
3577
3578         if (clear_page_dirty_for_io(page))
3579                 rc = cifs_writepage_locked(page, &wbc);
3580
3581         cifs_fscache_invalidate_page(page, page->mapping->host);
3582         return rc;
3583 }
3584
3585 void cifs_oplock_break(struct work_struct *work)
3586 {
3587         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3588                                                   oplock_break);
3589         struct inode *inode = cfile->dentry->d_inode;
3590         struct cifsInodeInfo *cinode = CIFS_I(inode);
3591         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3592         int rc = 0;
3593
3594         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3595                                                 cifs_has_mand_locks(cinode)) {
3596                 cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
3597                      inode);
3598                 cinode->clientCanCacheRead = false;
3599         }
3600
3601         if (inode && S_ISREG(inode->i_mode)) {
3602                 if (cinode->clientCanCacheRead)
3603                         break_lease(inode, O_RDONLY);
3604                 else
3605                         break_lease(inode, O_WRONLY);
3606                 rc = filemap_fdatawrite(inode->i_mapping);
3607                 if (cinode->clientCanCacheRead == 0) {
3608                         rc = filemap_fdatawait(inode->i_mapping);
3609                         mapping_set_error(inode->i_mapping, rc);
3610                         cifs_invalidate_mapping(inode);
3611                 }
3612                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3613         }
3614
3615         rc = cifs_push_locks(cfile);
3616         if (rc)
3617                 cERROR(1, "Push locks rc = %d", rc);
3618
3619         /*
3620          * releasing stale oplock after recent reconnect of smb session using
3621          * a now incorrect file handle is not a data integrity issue but do
3622          * not bother sending an oplock release if session to server still is
3623          * disconnected since oplock already released by the server
3624          */
3625         if (!cfile->oplock_break_cancelled) {
3626                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3627                                                              cinode);
3628                 cFYI(1, "Oplock release rc = %d", rc);
3629         }
3630 }
3631
3632 const struct address_space_operations cifs_addr_ops = {
3633         .readpage = cifs_readpage,
3634         .readpages = cifs_readpages,
3635         .writepage = cifs_writepage,
3636         .writepages = cifs_writepages,
3637         .write_begin = cifs_write_begin,
3638         .write_end = cifs_write_end,
3639         .set_page_dirty = __set_page_dirty_nobuffers,
3640         .releasepage = cifs_release_page,
3641         .invalidatepage = cifs_invalidate_page,
3642         .launder_page = cifs_launder_page,
3643 };
3644
3645 /*
3646  * cifs_readpages requires the server to support a buffer large enough to
3647  * contain the header plus one complete page of data.  Otherwise, we need
3648  * to leave cifs_readpages out of the address space operations.
3649  */
3650 const struct address_space_operations cifs_addr_ops_smallbuf = {
3651         .readpage = cifs_readpage,
3652         .writepage = cifs_writepage,
3653         .writepages = cifs_writepages,
3654         .write_begin = cifs_write_begin,
3655         .write_end = cifs_write_end,
3656         .set_page_dirty = __set_page_dirty_nobuffers,
3657         .releasepage = cifs_release_page,
3658         .invalidatepage = cifs_invalidate_page,
3659         .launder_page = cifs_launder_page,
3660 };