Merge tag 'mmc-fixes-for-3.7' of git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc
[cascardo/linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46 static inline int cifs_convert_flags(unsigned int flags)
47 {
48         if ((flags & O_ACCMODE) == O_RDONLY)
49                 return GENERIC_READ;
50         else if ((flags & O_ACCMODE) == O_WRONLY)
51                 return GENERIC_WRITE;
52         else if ((flags & O_ACCMODE) == O_RDWR) {
53                 /* GENERIC_ALL is too much permission to request
54                    can cause unnecessary access denied on create */
55                 /* return GENERIC_ALL; */
56                 return (GENERIC_READ | GENERIC_WRITE);
57         }
58
59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61                 FILE_READ_DATA);
62 }
63
64 static u32 cifs_posix_convert_flags(unsigned int flags)
65 {
66         u32 posix_flags = 0;
67
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 posix_flags = SMB_O_RDONLY;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 posix_flags = SMB_O_WRONLY;
72         else if ((flags & O_ACCMODE) == O_RDWR)
73                 posix_flags = SMB_O_RDWR;
74
75         if (flags & O_CREAT)
76                 posix_flags |= SMB_O_CREAT;
77         if (flags & O_EXCL)
78                 posix_flags |= SMB_O_EXCL;
79         if (flags & O_TRUNC)
80                 posix_flags |= SMB_O_TRUNC;
81         /* be safe and imply O_SYNC for O_DSYNC */
82         if (flags & O_DSYNC)
83                 posix_flags |= SMB_O_SYNC;
84         if (flags & O_DIRECTORY)
85                 posix_flags |= SMB_O_DIRECTORY;
86         if (flags & O_NOFOLLOW)
87                 posix_flags |= SMB_O_NOFOLLOW;
88         if (flags & O_DIRECT)
89                 posix_flags |= SMB_O_DIRECT;
90
91         return posix_flags;
92 }
93
94 static inline int cifs_get_disposition(unsigned int flags)
95 {
96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97                 return FILE_CREATE;
98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99                 return FILE_OVERWRITE_IF;
100         else if ((flags & O_CREAT) == O_CREAT)
101                 return FILE_OPEN_IF;
102         else if ((flags & O_TRUNC) == O_TRUNC)
103                 return FILE_OVERWRITE;
104         else
105                 return FILE_OPEN;
106 }
107
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109                         struct super_block *sb, int mode, unsigned int f_flags,
110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
111 {
112         int rc;
113         FILE_UNIX_BASIC_INFO *presp_data;
114         __u32 posix_flags = 0;
115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116         struct cifs_fattr fattr;
117         struct tcon_link *tlink;
118         struct cifs_tcon *tcon;
119
120         cFYI(1, "posix open %s", full_path);
121
122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123         if (presp_data == NULL)
124                 return -ENOMEM;
125
126         tlink = cifs_sb_tlink(cifs_sb);
127         if (IS_ERR(tlink)) {
128                 rc = PTR_ERR(tlink);
129                 goto posix_open_ret;
130         }
131
132         tcon = tlink_tcon(tlink);
133         mode &= ~current_umask();
134
135         posix_flags = cifs_posix_convert_flags(f_flags);
136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137                              poplock, full_path, cifs_sb->local_nls,
138                              cifs_sb->mnt_cifs_flags &
139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
140         cifs_put_tlink(tlink);
141
142         if (rc)
143                 goto posix_open_ret;
144
145         if (presp_data->Type == cpu_to_le32(-1))
146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
147
148         if (!pinode)
149                 goto posix_open_ret; /* caller does not need info */
150
151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152
153         /* get new inode and set it up */
154         if (*pinode == NULL) {
155                 cifs_fill_uniqueid(sb, &fattr);
156                 *pinode = cifs_iget(sb, &fattr);
157                 if (!*pinode) {
158                         rc = -ENOMEM;
159                         goto posix_open_ret;
160                 }
161         } else {
162                 cifs_fattr_to_inode(*pinode, &fattr);
163         }
164
165 posix_open_ret:
166         kfree(presp_data);
167         return rc;
168 }
169
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173              struct cifs_fid *fid, unsigned int xid)
174 {
175         int rc;
176         int desired_access;
177         int disposition;
178         int create_options = CREATE_NOT_DIR;
179         FILE_ALL_INFO *buf;
180         struct TCP_Server_Info *server = tcon->ses->server;
181
182         if (!server->ops->open)
183                 return -ENOSYS;
184
185         desired_access = cifs_convert_flags(f_flags);
186
187 /*********************************************************************
188  *  open flag mapping table:
189  *
190  *      POSIX Flag            CIFS Disposition
191  *      ----------            ----------------
192  *      O_CREAT               FILE_OPEN_IF
193  *      O_CREAT | O_EXCL      FILE_CREATE
194  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
195  *      O_TRUNC               FILE_OVERWRITE
196  *      none of the above     FILE_OPEN
197  *
198  *      Note that there is not a direct match between disposition
199  *      FILE_SUPERSEDE (ie create whether or not file exists although
200  *      O_CREAT | O_TRUNC is similar but truncates the existing
201  *      file rather than creating a new file as FILE_SUPERSEDE does
202  *      (which uses the attributes / metadata passed in on open call)
203  *?
204  *?  O_SYNC is a reasonable match to CIFS writethrough flag
205  *?  and the read write flags match reasonably.  O_LARGEFILE
206  *?  is irrelevant because largefile support is always used
207  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
208  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
209  *********************************************************************/
210
211         disposition = cifs_get_disposition(f_flags);
212
213         /* BB pass O_SYNC flag through on file attributes .. BB */
214
215         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
216         if (!buf)
217                 return -ENOMEM;
218
219         if (backup_cred(cifs_sb))
220                 create_options |= CREATE_OPEN_BACKUP_INTENT;
221
222         rc = server->ops->open(xid, tcon, full_path, disposition,
223                                desired_access, create_options, fid, oplock, buf,
224                                cifs_sb);
225
226         if (rc)
227                 goto out;
228
229         if (tcon->unix_ext)
230                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
231                                               xid);
232         else
233                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
234                                          xid, &fid->netfid);
235
236 out:
237         kfree(buf);
238         return rc;
239 }
240
241 struct cifsFileInfo *
242 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
243                   struct tcon_link *tlink, __u32 oplock)
244 {
245         struct dentry *dentry = file->f_path.dentry;
246         struct inode *inode = dentry->d_inode;
247         struct cifsInodeInfo *cinode = CIFS_I(inode);
248         struct cifsFileInfo *cfile;
249         struct cifs_fid_locks *fdlocks;
250         struct cifs_tcon *tcon = tlink_tcon(tlink);
251
252         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253         if (cfile == NULL)
254                 return cfile;
255
256         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
257         if (!fdlocks) {
258                 kfree(cfile);
259                 return NULL;
260         }
261
262         INIT_LIST_HEAD(&fdlocks->locks);
263         fdlocks->cfile = cfile;
264         cfile->llist = fdlocks;
265         down_write(&cinode->lock_sem);
266         list_add(&fdlocks->llist, &cinode->llist);
267         up_write(&cinode->lock_sem);
268
269         cfile->count = 1;
270         cfile->pid = current->tgid;
271         cfile->uid = current_fsuid();
272         cfile->dentry = dget(dentry);
273         cfile->f_flags = file->f_flags;
274         cfile->invalidHandle = false;
275         cfile->tlink = cifs_get_tlink(tlink);
276         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
277         mutex_init(&cfile->fh_mutex);
278
279         spin_lock(&cifs_file_list_lock);
280         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
281                 oplock = fid->pending_open->oplock;
282         list_del(&fid->pending_open->olist);
283
284         tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
285
286         list_add(&cfile->tlist, &tcon->openFileList);
287         /* if readable file instance put first in list*/
288         if (file->f_mode & FMODE_READ)
289                 list_add(&cfile->flist, &cinode->openFileList);
290         else
291                 list_add_tail(&cfile->flist, &cinode->openFileList);
292         spin_unlock(&cifs_file_list_lock);
293
294         file->private_data = cfile;
295         return cfile;
296 }
297
298 struct cifsFileInfo *
299 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
300 {
301         spin_lock(&cifs_file_list_lock);
302         cifsFileInfo_get_locked(cifs_file);
303         spin_unlock(&cifs_file_list_lock);
304         return cifs_file;
305 }
306
307 /*
308  * Release a reference on the file private data. This may involve closing
309  * the filehandle out on the server. Must be called without holding
310  * cifs_file_list_lock.
311  */
312 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
313 {
314         struct inode *inode = cifs_file->dentry->d_inode;
315         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
316         struct TCP_Server_Info *server = tcon->ses->server;
317         struct cifsInodeInfo *cifsi = CIFS_I(inode);
318         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
319         struct cifsLockInfo *li, *tmp;
320         struct cifs_fid fid;
321         struct cifs_pending_open open;
322
323         spin_lock(&cifs_file_list_lock);
324         if (--cifs_file->count > 0) {
325                 spin_unlock(&cifs_file_list_lock);
326                 return;
327         }
328
329         if (server->ops->get_lease_key)
330                 server->ops->get_lease_key(inode, &fid);
331
332         /* store open in pending opens to make sure we don't miss lease break */
333         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
334
335         /* remove it from the lists */
336         list_del(&cifs_file->flist);
337         list_del(&cifs_file->tlist);
338
339         if (list_empty(&cifsi->openFileList)) {
340                 cFYI(1, "closing last open instance for inode %p",
341                         cifs_file->dentry->d_inode);
342                 /*
343                  * In strict cache mode we need invalidate mapping on the last
344                  * close  because it may cause a error when we open this file
345                  * again and get at least level II oplock.
346                  */
347                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
348                         CIFS_I(inode)->invalid_mapping = true;
349                 cifs_set_oplock_level(cifsi, 0);
350         }
351         spin_unlock(&cifs_file_list_lock);
352
353         cancel_work_sync(&cifs_file->oplock_break);
354
355         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
356                 struct TCP_Server_Info *server = tcon->ses->server;
357                 unsigned int xid;
358
359                 xid = get_xid();
360                 if (server->ops->close)
361                         server->ops->close(xid, tcon, &cifs_file->fid);
362                 _free_xid(xid);
363         }
364
365         cifs_del_pending_open(&open);
366
367         /*
368          * Delete any outstanding lock records. We'll lose them when the file
369          * is closed anyway.
370          */
371         down_write(&cifsi->lock_sem);
372         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
373                 list_del(&li->llist);
374                 cifs_del_lock_waiters(li);
375                 kfree(li);
376         }
377         list_del(&cifs_file->llist->llist);
378         kfree(cifs_file->llist);
379         up_write(&cifsi->lock_sem);
380
381         cifs_put_tlink(cifs_file->tlink);
382         dput(cifs_file->dentry);
383         kfree(cifs_file);
384 }
385
386 int cifs_open(struct inode *inode, struct file *file)
387
388 {
389         int rc = -EACCES;
390         unsigned int xid;
391         __u32 oplock;
392         struct cifs_sb_info *cifs_sb;
393         struct TCP_Server_Info *server;
394         struct cifs_tcon *tcon;
395         struct tcon_link *tlink;
396         struct cifsFileInfo *cfile = NULL;
397         char *full_path = NULL;
398         bool posix_open_ok = false;
399         struct cifs_fid fid;
400         struct cifs_pending_open open;
401
402         xid = get_xid();
403
404         cifs_sb = CIFS_SB(inode->i_sb);
405         tlink = cifs_sb_tlink(cifs_sb);
406         if (IS_ERR(tlink)) {
407                 free_xid(xid);
408                 return PTR_ERR(tlink);
409         }
410         tcon = tlink_tcon(tlink);
411         server = tcon->ses->server;
412
413         full_path = build_path_from_dentry(file->f_path.dentry);
414         if (full_path == NULL) {
415                 rc = -ENOMEM;
416                 goto out;
417         }
418
419         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
420                  inode, file->f_flags, full_path);
421
422         if (server->oplocks)
423                 oplock = REQ_OPLOCK;
424         else
425                 oplock = 0;
426
427         if (!tcon->broken_posix_open && tcon->unix_ext &&
428             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
429                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
430                 /* can not refresh inode info since size could be stale */
431                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
432                                 cifs_sb->mnt_file_mode /* ignored */,
433                                 file->f_flags, &oplock, &fid.netfid, xid);
434                 if (rc == 0) {
435                         cFYI(1, "posix open succeeded");
436                         posix_open_ok = true;
437                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
438                         if (tcon->ses->serverNOS)
439                                 cERROR(1, "server %s of type %s returned"
440                                            " unexpected error on SMB posix open"
441                                            ", disabling posix open support."
442                                            " Check if server update available.",
443                                            tcon->ses->serverName,
444                                            tcon->ses->serverNOS);
445                         tcon->broken_posix_open = true;
446                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
447                          (rc != -EOPNOTSUPP)) /* path not found or net err */
448                         goto out;
449                 /*
450                  * Else fallthrough to retry open the old way on network i/o
451                  * or DFS errors.
452                  */
453         }
454
455         if (server->ops->get_lease_key)
456                 server->ops->get_lease_key(inode, &fid);
457
458         cifs_add_pending_open(&fid, tlink, &open);
459
460         if (!posix_open_ok) {
461                 if (server->ops->get_lease_key)
462                         server->ops->get_lease_key(inode, &fid);
463
464                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
465                                   file->f_flags, &oplock, &fid, xid);
466                 if (rc) {
467                         cifs_del_pending_open(&open);
468                         goto out;
469                 }
470         }
471
472         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
473         if (cfile == NULL) {
474                 if (server->ops->close)
475                         server->ops->close(xid, tcon, &fid);
476                 cifs_del_pending_open(&open);
477                 rc = -ENOMEM;
478                 goto out;
479         }
480
481         cifs_fscache_set_inode_cookie(inode, file);
482
483         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
484                 /*
485                  * Time to set mode which we can not set earlier due to
486                  * problems creating new read-only files.
487                  */
488                 struct cifs_unix_set_info_args args = {
489                         .mode   = inode->i_mode,
490                         .uid    = NO_CHANGE_64,
491                         .gid    = NO_CHANGE_64,
492                         .ctime  = NO_CHANGE_64,
493                         .atime  = NO_CHANGE_64,
494                         .mtime  = NO_CHANGE_64,
495                         .device = 0,
496                 };
497                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
498                                        cfile->pid);
499         }
500
501 out:
502         kfree(full_path);
503         free_xid(xid);
504         cifs_put_tlink(tlink);
505         return rc;
506 }
507
508 /*
509  * Try to reacquire byte range locks that were released when session
510  * to server was lost
511  */
512 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
513 {
514         int rc = 0;
515
516         /* BB list all locks open on this file and relock */
517
518         return rc;
519 }
520
521 static int
522 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
523 {
524         int rc = -EACCES;
525         unsigned int xid;
526         __u32 oplock;
527         struct cifs_sb_info *cifs_sb;
528         struct cifs_tcon *tcon;
529         struct TCP_Server_Info *server;
530         struct cifsInodeInfo *cinode;
531         struct inode *inode;
532         char *full_path = NULL;
533         int desired_access;
534         int disposition = FILE_OPEN;
535         int create_options = CREATE_NOT_DIR;
536         struct cifs_fid fid;
537
538         xid = get_xid();
539         mutex_lock(&cfile->fh_mutex);
540         if (!cfile->invalidHandle) {
541                 mutex_unlock(&cfile->fh_mutex);
542                 rc = 0;
543                 free_xid(xid);
544                 return rc;
545         }
546
547         inode = cfile->dentry->d_inode;
548         cifs_sb = CIFS_SB(inode->i_sb);
549         tcon = tlink_tcon(cfile->tlink);
550         server = tcon->ses->server;
551
552         /*
553          * Can not grab rename sem here because various ops, including those
554          * that already have the rename sem can end up causing writepage to get
555          * called and if the server was down that means we end up here, and we
556          * can never tell if the caller already has the rename_sem.
557          */
558         full_path = build_path_from_dentry(cfile->dentry);
559         if (full_path == NULL) {
560                 rc = -ENOMEM;
561                 mutex_unlock(&cfile->fh_mutex);
562                 free_xid(xid);
563                 return rc;
564         }
565
566         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
567              full_path);
568
569         if (tcon->ses->server->oplocks)
570                 oplock = REQ_OPLOCK;
571         else
572                 oplock = 0;
573
574         if (tcon->unix_ext && cap_unix(tcon->ses) &&
575             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
576                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
577                 /*
578                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
579                  * original open. Must mask them off for a reopen.
580                  */
581                 unsigned int oflags = cfile->f_flags &
582                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
583
584                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
585                                      cifs_sb->mnt_file_mode /* ignored */,
586                                      oflags, &oplock, &fid.netfid, xid);
587                 if (rc == 0) {
588                         cFYI(1, "posix reopen succeeded");
589                         goto reopen_success;
590                 }
591                 /*
592                  * fallthrough to retry open the old way on errors, especially
593                  * in the reconnect path it is important to retry hard
594                  */
595         }
596
597         desired_access = cifs_convert_flags(cfile->f_flags);
598
599         if (backup_cred(cifs_sb))
600                 create_options |= CREATE_OPEN_BACKUP_INTENT;
601
602         if (server->ops->get_lease_key)
603                 server->ops->get_lease_key(inode, &fid);
604
605         /*
606          * Can not refresh inode by passing in file_info buf to be returned by
607          * CIFSSMBOpen and then calling get_inode_info with returned buf since
608          * file might have write behind data that needs to be flushed and server
609          * version of file size can be stale. If we knew for sure that inode was
610          * not dirty locally we could do this.
611          */
612         rc = server->ops->open(xid, tcon, full_path, disposition,
613                                desired_access, create_options, &fid, &oplock,
614                                NULL, cifs_sb);
615         if (rc) {
616                 mutex_unlock(&cfile->fh_mutex);
617                 cFYI(1, "cifs_reopen returned 0x%x", rc);
618                 cFYI(1, "oplock: %d", oplock);
619                 goto reopen_error_exit;
620         }
621
622 reopen_success:
623         cfile->invalidHandle = false;
624         mutex_unlock(&cfile->fh_mutex);
625         cinode = CIFS_I(inode);
626
627         if (can_flush) {
628                 rc = filemap_write_and_wait(inode->i_mapping);
629                 mapping_set_error(inode->i_mapping, rc);
630
631                 if (tcon->unix_ext)
632                         rc = cifs_get_inode_info_unix(&inode, full_path,
633                                                       inode->i_sb, xid);
634                 else
635                         rc = cifs_get_inode_info(&inode, full_path, NULL,
636                                                  inode->i_sb, xid, NULL);
637         }
638         /*
639          * Else we are writing out data to server already and could deadlock if
640          * we tried to flush data, and since we do not know if we have data that
641          * would invalidate the current end of file on the server we can not go
642          * to the server to get the new inode info.
643          */
644
645         server->ops->set_fid(cfile, &fid, oplock);
646         cifs_relock_file(cfile);
647
648 reopen_error_exit:
649         kfree(full_path);
650         free_xid(xid);
651         return rc;
652 }
653
654 int cifs_close(struct inode *inode, struct file *file)
655 {
656         if (file->private_data != NULL) {
657                 cifsFileInfo_put(file->private_data);
658                 file->private_data = NULL;
659         }
660
661         /* return code from the ->release op is always ignored */
662         return 0;
663 }
664
665 int cifs_closedir(struct inode *inode, struct file *file)
666 {
667         int rc = 0;
668         unsigned int xid;
669         struct cifsFileInfo *cfile = file->private_data;
670         struct cifs_tcon *tcon;
671         struct TCP_Server_Info *server;
672         char *buf;
673
674         cFYI(1, "Closedir inode = 0x%p", inode);
675
676         if (cfile == NULL)
677                 return rc;
678
679         xid = get_xid();
680         tcon = tlink_tcon(cfile->tlink);
681         server = tcon->ses->server;
682
683         cFYI(1, "Freeing private data in close dir");
684         spin_lock(&cifs_file_list_lock);
685         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
686                 cfile->invalidHandle = true;
687                 spin_unlock(&cifs_file_list_lock);
688                 if (server->ops->close_dir)
689                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
690                 else
691                         rc = -ENOSYS;
692                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
693                 /* not much we can do if it fails anyway, ignore rc */
694                 rc = 0;
695         } else
696                 spin_unlock(&cifs_file_list_lock);
697
698         buf = cfile->srch_inf.ntwrk_buf_start;
699         if (buf) {
700                 cFYI(1, "closedir free smb buf in srch struct");
701                 cfile->srch_inf.ntwrk_buf_start = NULL;
702                 if (cfile->srch_inf.smallBuf)
703                         cifs_small_buf_release(buf);
704                 else
705                         cifs_buf_release(buf);
706         }
707
708         cifs_put_tlink(cfile->tlink);
709         kfree(file->private_data);
710         file->private_data = NULL;
711         /* BB can we lock the filestruct while this is going on? */
712         free_xid(xid);
713         return rc;
714 }
715
716 static struct cifsLockInfo *
717 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
718 {
719         struct cifsLockInfo *lock =
720                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
721         if (!lock)
722                 return lock;
723         lock->offset = offset;
724         lock->length = length;
725         lock->type = type;
726         lock->pid = current->tgid;
727         INIT_LIST_HEAD(&lock->blist);
728         init_waitqueue_head(&lock->block_q);
729         return lock;
730 }
731
732 void
733 cifs_del_lock_waiters(struct cifsLockInfo *lock)
734 {
735         struct cifsLockInfo *li, *tmp;
736         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
737                 list_del_init(&li->blist);
738                 wake_up(&li->block_q);
739         }
740 }
741
742 static bool
743 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
744                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
745                             struct cifsLockInfo **conf_lock, bool rw_check)
746 {
747         struct cifsLockInfo *li;
748         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
749         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
750
751         list_for_each_entry(li, &fdlocks->locks, llist) {
752                 if (offset + length <= li->offset ||
753                     offset >= li->offset + li->length)
754                         continue;
755                 if (rw_check && server->ops->compare_fids(cfile, cur_cfile) &&
756                     current->tgid == li->pid)
757                         continue;
758                 if ((type & server->vals->shared_lock_type) &&
759                     ((server->ops->compare_fids(cfile, cur_cfile) &&
760                      current->tgid == li->pid) || type == li->type))
761                         continue;
762                 if (conf_lock)
763                         *conf_lock = li;
764                 return true;
765         }
766         return false;
767 }
768
769 bool
770 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
771                         __u8 type, struct cifsLockInfo **conf_lock,
772                         bool rw_check)
773 {
774         bool rc = false;
775         struct cifs_fid_locks *cur;
776         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
777
778         list_for_each_entry(cur, &cinode->llist, llist) {
779                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
780                                                  cfile, conf_lock, rw_check);
781                 if (rc)
782                         break;
783         }
784
785         return rc;
786 }
787
788 /*
789  * Check if there is another lock that prevents us to set the lock (mandatory
790  * style). If such a lock exists, update the flock structure with its
791  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
792  * or leave it the same if we can't. Returns 0 if we don't need to request to
793  * the server or 1 otherwise.
794  */
795 static int
796 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
797                __u8 type, struct file_lock *flock)
798 {
799         int rc = 0;
800         struct cifsLockInfo *conf_lock;
801         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
802         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
803         bool exist;
804
805         down_read(&cinode->lock_sem);
806
807         exist = cifs_find_lock_conflict(cfile, offset, length, type,
808                                         &conf_lock, false);
809         if (exist) {
810                 flock->fl_start = conf_lock->offset;
811                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
812                 flock->fl_pid = conf_lock->pid;
813                 if (conf_lock->type & server->vals->shared_lock_type)
814                         flock->fl_type = F_RDLCK;
815                 else
816                         flock->fl_type = F_WRLCK;
817         } else if (!cinode->can_cache_brlcks)
818                 rc = 1;
819         else
820                 flock->fl_type = F_UNLCK;
821
822         up_read(&cinode->lock_sem);
823         return rc;
824 }
825
826 static void
827 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
828 {
829         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
830         down_write(&cinode->lock_sem);
831         list_add_tail(&lock->llist, &cfile->llist->locks);
832         up_write(&cinode->lock_sem);
833 }
834
835 /*
836  * Set the byte-range lock (mandatory style). Returns:
837  * 1) 0, if we set the lock and don't need to request to the server;
838  * 2) 1, if no locks prevent us but we need to request to the server;
839  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
840  */
841 static int
842 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
843                  bool wait)
844 {
845         struct cifsLockInfo *conf_lock;
846         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
847         bool exist;
848         int rc = 0;
849
850 try_again:
851         exist = false;
852         down_write(&cinode->lock_sem);
853
854         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
855                                         lock->type, &conf_lock, false);
856         if (!exist && cinode->can_cache_brlcks) {
857                 list_add_tail(&lock->llist, &cfile->llist->locks);
858                 up_write(&cinode->lock_sem);
859                 return rc;
860         }
861
862         if (!exist)
863                 rc = 1;
864         else if (!wait)
865                 rc = -EACCES;
866         else {
867                 list_add_tail(&lock->blist, &conf_lock->blist);
868                 up_write(&cinode->lock_sem);
869                 rc = wait_event_interruptible(lock->block_q,
870                                         (lock->blist.prev == &lock->blist) &&
871                                         (lock->blist.next == &lock->blist));
872                 if (!rc)
873                         goto try_again;
874                 down_write(&cinode->lock_sem);
875                 list_del_init(&lock->blist);
876         }
877
878         up_write(&cinode->lock_sem);
879         return rc;
880 }
881
882 /*
883  * Check if there is another lock that prevents us to set the lock (posix
884  * style). If such a lock exists, update the flock structure with its
885  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
886  * or leave it the same if we can't. Returns 0 if we don't need to request to
887  * the server or 1 otherwise.
888  */
889 static int
890 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
891 {
892         int rc = 0;
893         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
894         unsigned char saved_type = flock->fl_type;
895
896         if ((flock->fl_flags & FL_POSIX) == 0)
897                 return 1;
898
899         down_read(&cinode->lock_sem);
900         posix_test_lock(file, flock);
901
902         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
903                 flock->fl_type = saved_type;
904                 rc = 1;
905         }
906
907         up_read(&cinode->lock_sem);
908         return rc;
909 }
910
911 /*
912  * Set the byte-range lock (posix style). Returns:
913  * 1) 0, if we set the lock and don't need to request to the server;
914  * 2) 1, if we need to request to the server;
915  * 3) <0, if the error occurs while setting the lock.
916  */
917 static int
918 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
919 {
920         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
921         int rc = 1;
922
923         if ((flock->fl_flags & FL_POSIX) == 0)
924                 return rc;
925
926 try_again:
927         down_write(&cinode->lock_sem);
928         if (!cinode->can_cache_brlcks) {
929                 up_write(&cinode->lock_sem);
930                 return rc;
931         }
932
933         rc = posix_lock_file(file, flock, NULL);
934         up_write(&cinode->lock_sem);
935         if (rc == FILE_LOCK_DEFERRED) {
936                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
937                 if (!rc)
938                         goto try_again;
939                 locks_delete_block(flock);
940         }
941         return rc;
942 }
943
944 int
945 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
946 {
947         unsigned int xid;
948         int rc = 0, stored_rc;
949         struct cifsLockInfo *li, *tmp;
950         struct cifs_tcon *tcon;
951         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
952         unsigned int num, max_num, max_buf;
953         LOCKING_ANDX_RANGE *buf, *cur;
954         int types[] = {LOCKING_ANDX_LARGE_FILES,
955                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
956         int i;
957
958         xid = get_xid();
959         tcon = tlink_tcon(cfile->tlink);
960
961         /* we are going to update can_cache_brlcks here - need a write access */
962         down_write(&cinode->lock_sem);
963         if (!cinode->can_cache_brlcks) {
964                 up_write(&cinode->lock_sem);
965                 free_xid(xid);
966                 return rc;
967         }
968
969         /*
970          * Accessing maxBuf is racy with cifs_reconnect - need to store value
971          * and check it for zero before using.
972          */
973         max_buf = tcon->ses->server->maxBuf;
974         if (!max_buf) {
975                 up_write(&cinode->lock_sem);
976                 free_xid(xid);
977                 return -EINVAL;
978         }
979
980         max_num = (max_buf - sizeof(struct smb_hdr)) /
981                                                 sizeof(LOCKING_ANDX_RANGE);
982         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
983         if (!buf) {
984                 up_write(&cinode->lock_sem);
985                 free_xid(xid);
986                 return -ENOMEM;
987         }
988
989         for (i = 0; i < 2; i++) {
990                 cur = buf;
991                 num = 0;
992                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
993                         if (li->type != types[i])
994                                 continue;
995                         cur->Pid = cpu_to_le16(li->pid);
996                         cur->LengthLow = cpu_to_le32((u32)li->length);
997                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
998                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
999                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1000                         if (++num == max_num) {
1001                                 stored_rc = cifs_lockv(xid, tcon,
1002                                                        cfile->fid.netfid,
1003                                                        (__u8)li->type, 0, num,
1004                                                        buf);
1005                                 if (stored_rc)
1006                                         rc = stored_rc;
1007                                 cur = buf;
1008                                 num = 0;
1009                         } else
1010                                 cur++;
1011                 }
1012
1013                 if (num) {
1014                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1015                                                (__u8)types[i], 0, num, buf);
1016                         if (stored_rc)
1017                                 rc = stored_rc;
1018                 }
1019         }
1020
1021         cinode->can_cache_brlcks = false;
1022         up_write(&cinode->lock_sem);
1023
1024         kfree(buf);
1025         free_xid(xid);
1026         return rc;
1027 }
1028
1029 /* copied from fs/locks.c with a name change */
1030 #define cifs_for_each_lock(inode, lockp) \
1031         for (lockp = &inode->i_flock; *lockp != NULL; \
1032              lockp = &(*lockp)->fl_next)
1033
1034 struct lock_to_push {
1035         struct list_head llist;
1036         __u64 offset;
1037         __u64 length;
1038         __u32 pid;
1039         __u16 netfid;
1040         __u8 type;
1041 };
1042
1043 static int
1044 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1045 {
1046         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1047         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1048         struct file_lock *flock, **before;
1049         unsigned int count = 0, i = 0;
1050         int rc = 0, xid, type;
1051         struct list_head locks_to_send, *el;
1052         struct lock_to_push *lck, *tmp;
1053         __u64 length;
1054
1055         xid = get_xid();
1056
1057         /* we are going to update can_cache_brlcks here - need a write access */
1058         down_write(&cinode->lock_sem);
1059         if (!cinode->can_cache_brlcks) {
1060                 up_write(&cinode->lock_sem);
1061                 free_xid(xid);
1062                 return rc;
1063         }
1064
1065         lock_flocks();
1066         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1067                 if ((*before)->fl_flags & FL_POSIX)
1068                         count++;
1069         }
1070         unlock_flocks();
1071
1072         INIT_LIST_HEAD(&locks_to_send);
1073
1074         /*
1075          * Allocating count locks is enough because no FL_POSIX locks can be
1076          * added to the list while we are holding cinode->lock_sem that
1077          * protects locking operations of this inode.
1078          */
1079         for (; i < count; i++) {
1080                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1081                 if (!lck) {
1082                         rc = -ENOMEM;
1083                         goto err_out;
1084                 }
1085                 list_add_tail(&lck->llist, &locks_to_send);
1086         }
1087
1088         el = locks_to_send.next;
1089         lock_flocks();
1090         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1091                 flock = *before;
1092                 if ((flock->fl_flags & FL_POSIX) == 0)
1093                         continue;
1094                 if (el == &locks_to_send) {
1095                         /*
1096                          * The list ended. We don't have enough allocated
1097                          * structures - something is really wrong.
1098                          */
1099                         cERROR(1, "Can't push all brlocks!");
1100                         break;
1101                 }
1102                 length = 1 + flock->fl_end - flock->fl_start;
1103                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1104                         type = CIFS_RDLCK;
1105                 else
1106                         type = CIFS_WRLCK;
1107                 lck = list_entry(el, struct lock_to_push, llist);
1108                 lck->pid = flock->fl_pid;
1109                 lck->netfid = cfile->fid.netfid;
1110                 lck->length = length;
1111                 lck->type = type;
1112                 lck->offset = flock->fl_start;
1113                 el = el->next;
1114         }
1115         unlock_flocks();
1116
1117         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1118                 int stored_rc;
1119
1120                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1121                                              lck->offset, lck->length, NULL,
1122                                              lck->type, 0);
1123                 if (stored_rc)
1124                         rc = stored_rc;
1125                 list_del(&lck->llist);
1126                 kfree(lck);
1127         }
1128
1129 out:
1130         cinode->can_cache_brlcks = false;
1131         up_write(&cinode->lock_sem);
1132
1133         free_xid(xid);
1134         return rc;
1135 err_out:
1136         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1137                 list_del(&lck->llist);
1138                 kfree(lck);
1139         }
1140         goto out;
1141 }
1142
1143 static int
1144 cifs_push_locks(struct cifsFileInfo *cfile)
1145 {
1146         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1147         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1148
1149         if (cap_unix(tcon->ses) &&
1150             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1151             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1152                 return cifs_push_posix_locks(cfile);
1153
1154         return tcon->ses->server->ops->push_mand_locks(cfile);
1155 }
1156
1157 static void
1158 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1159                 bool *wait_flag, struct TCP_Server_Info *server)
1160 {
1161         if (flock->fl_flags & FL_POSIX)
1162                 cFYI(1, "Posix");
1163         if (flock->fl_flags & FL_FLOCK)
1164                 cFYI(1, "Flock");
1165         if (flock->fl_flags & FL_SLEEP) {
1166                 cFYI(1, "Blocking lock");
1167                 *wait_flag = true;
1168         }
1169         if (flock->fl_flags & FL_ACCESS)
1170                 cFYI(1, "Process suspended by mandatory locking - "
1171                         "not implemented yet");
1172         if (flock->fl_flags & FL_LEASE)
1173                 cFYI(1, "Lease on file - not implemented yet");
1174         if (flock->fl_flags &
1175             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1176                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1177                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1178
1179         *type = server->vals->large_lock_type;
1180         if (flock->fl_type == F_WRLCK) {
1181                 cFYI(1, "F_WRLCK ");
1182                 *type |= server->vals->exclusive_lock_type;
1183                 *lock = 1;
1184         } else if (flock->fl_type == F_UNLCK) {
1185                 cFYI(1, "F_UNLCK");
1186                 *type |= server->vals->unlock_lock_type;
1187                 *unlock = 1;
1188                 /* Check if unlock includes more than one lock range */
1189         } else if (flock->fl_type == F_RDLCK) {
1190                 cFYI(1, "F_RDLCK");
1191                 *type |= server->vals->shared_lock_type;
1192                 *lock = 1;
1193         } else if (flock->fl_type == F_EXLCK) {
1194                 cFYI(1, "F_EXLCK");
1195                 *type |= server->vals->exclusive_lock_type;
1196                 *lock = 1;
1197         } else if (flock->fl_type == F_SHLCK) {
1198                 cFYI(1, "F_SHLCK");
1199                 *type |= server->vals->shared_lock_type;
1200                 *lock = 1;
1201         } else
1202                 cFYI(1, "Unknown type of lock");
1203 }
1204
1205 static int
1206 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1207            bool wait_flag, bool posix_lck, unsigned int xid)
1208 {
1209         int rc = 0;
1210         __u64 length = 1 + flock->fl_end - flock->fl_start;
1211         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1212         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213         struct TCP_Server_Info *server = tcon->ses->server;
1214         __u16 netfid = cfile->fid.netfid;
1215
1216         if (posix_lck) {
1217                 int posix_lock_type;
1218
1219                 rc = cifs_posix_lock_test(file, flock);
1220                 if (!rc)
1221                         return rc;
1222
1223                 if (type & server->vals->shared_lock_type)
1224                         posix_lock_type = CIFS_RDLCK;
1225                 else
1226                         posix_lock_type = CIFS_WRLCK;
1227                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1228                                       flock->fl_start, length, flock,
1229                                       posix_lock_type, wait_flag);
1230                 return rc;
1231         }
1232
1233         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1234         if (!rc)
1235                 return rc;
1236
1237         /* BB we could chain these into one lock request BB */
1238         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1239                                     1, 0, false);
1240         if (rc == 0) {
1241                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1242                                             type, 0, 1, false);
1243                 flock->fl_type = F_UNLCK;
1244                 if (rc != 0)
1245                         cERROR(1, "Error unlocking previously locked "
1246                                   "range %d during test of lock", rc);
1247                 return 0;
1248         }
1249
1250         if (type & server->vals->shared_lock_type) {
1251                 flock->fl_type = F_WRLCK;
1252                 return 0;
1253         }
1254
1255         type &= ~server->vals->exclusive_lock_type;
1256
1257         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1258                                     type | server->vals->shared_lock_type,
1259                                     1, 0, false);
1260         if (rc == 0) {
1261                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1262                         type | server->vals->shared_lock_type, 0, 1, false);
1263                 flock->fl_type = F_RDLCK;
1264                 if (rc != 0)
1265                         cERROR(1, "Error unlocking previously locked "
1266                                   "range %d during test of lock", rc);
1267         } else
1268                 flock->fl_type = F_WRLCK;
1269
1270         return 0;
1271 }
1272
1273 void
1274 cifs_move_llist(struct list_head *source, struct list_head *dest)
1275 {
1276         struct list_head *li, *tmp;
1277         list_for_each_safe(li, tmp, source)
1278                 list_move(li, dest);
1279 }
1280
1281 void
1282 cifs_free_llist(struct list_head *llist)
1283 {
1284         struct cifsLockInfo *li, *tmp;
1285         list_for_each_entry_safe(li, tmp, llist, llist) {
1286                 cifs_del_lock_waiters(li);
1287                 list_del(&li->llist);
1288                 kfree(li);
1289         }
1290 }
1291
1292 int
1293 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1294                   unsigned int xid)
1295 {
1296         int rc = 0, stored_rc;
1297         int types[] = {LOCKING_ANDX_LARGE_FILES,
1298                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1299         unsigned int i;
1300         unsigned int max_num, num, max_buf;
1301         LOCKING_ANDX_RANGE *buf, *cur;
1302         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1303         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1304         struct cifsLockInfo *li, *tmp;
1305         __u64 length = 1 + flock->fl_end - flock->fl_start;
1306         struct list_head tmp_llist;
1307
1308         INIT_LIST_HEAD(&tmp_llist);
1309
1310         /*
1311          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1312          * and check it for zero before using.
1313          */
1314         max_buf = tcon->ses->server->maxBuf;
1315         if (!max_buf)
1316                 return -EINVAL;
1317
1318         max_num = (max_buf - sizeof(struct smb_hdr)) /
1319                                                 sizeof(LOCKING_ANDX_RANGE);
1320         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1321         if (!buf)
1322                 return -ENOMEM;
1323
1324         down_write(&cinode->lock_sem);
1325         for (i = 0; i < 2; i++) {
1326                 cur = buf;
1327                 num = 0;
1328                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1329                         if (flock->fl_start > li->offset ||
1330                             (flock->fl_start + length) <
1331                             (li->offset + li->length))
1332                                 continue;
1333                         if (current->tgid != li->pid)
1334                                 continue;
1335                         if (types[i] != li->type)
1336                                 continue;
1337                         if (cinode->can_cache_brlcks) {
1338                                 /*
1339                                  * We can cache brlock requests - simply remove
1340                                  * a lock from the file's list.
1341                                  */
1342                                 list_del(&li->llist);
1343                                 cifs_del_lock_waiters(li);
1344                                 kfree(li);
1345                                 continue;
1346                         }
1347                         cur->Pid = cpu_to_le16(li->pid);
1348                         cur->LengthLow = cpu_to_le32((u32)li->length);
1349                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1350                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1351                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1352                         /*
1353                          * We need to save a lock here to let us add it again to
1354                          * the file's list if the unlock range request fails on
1355                          * the server.
1356                          */
1357                         list_move(&li->llist, &tmp_llist);
1358                         if (++num == max_num) {
1359                                 stored_rc = cifs_lockv(xid, tcon,
1360                                                        cfile->fid.netfid,
1361                                                        li->type, num, 0, buf);
1362                                 if (stored_rc) {
1363                                         /*
1364                                          * We failed on the unlock range
1365                                          * request - add all locks from the tmp
1366                                          * list to the head of the file's list.
1367                                          */
1368                                         cifs_move_llist(&tmp_llist,
1369                                                         &cfile->llist->locks);
1370                                         rc = stored_rc;
1371                                 } else
1372                                         /*
1373                                          * The unlock range request succeed -
1374                                          * free the tmp list.
1375                                          */
1376                                         cifs_free_llist(&tmp_llist);
1377                                 cur = buf;
1378                                 num = 0;
1379                         } else
1380                                 cur++;
1381                 }
1382                 if (num) {
1383                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1384                                                types[i], num, 0, buf);
1385                         if (stored_rc) {
1386                                 cifs_move_llist(&tmp_llist,
1387                                                 &cfile->llist->locks);
1388                                 rc = stored_rc;
1389                         } else
1390                                 cifs_free_llist(&tmp_llist);
1391                 }
1392         }
1393
1394         up_write(&cinode->lock_sem);
1395         kfree(buf);
1396         return rc;
1397 }
1398
1399 static int
1400 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1401            bool wait_flag, bool posix_lck, int lock, int unlock,
1402            unsigned int xid)
1403 {
1404         int rc = 0;
1405         __u64 length = 1 + flock->fl_end - flock->fl_start;
1406         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1407         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1408         struct TCP_Server_Info *server = tcon->ses->server;
1409
1410         if (posix_lck) {
1411                 int posix_lock_type;
1412
1413                 rc = cifs_posix_lock_set(file, flock);
1414                 if (!rc || rc < 0)
1415                         return rc;
1416
1417                 if (type & server->vals->shared_lock_type)
1418                         posix_lock_type = CIFS_RDLCK;
1419                 else
1420                         posix_lock_type = CIFS_WRLCK;
1421
1422                 if (unlock == 1)
1423                         posix_lock_type = CIFS_UNLCK;
1424
1425                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1426                                       current->tgid, flock->fl_start, length,
1427                                       NULL, posix_lock_type, wait_flag);
1428                 goto out;
1429         }
1430
1431         if (lock) {
1432                 struct cifsLockInfo *lock;
1433
1434                 lock = cifs_lock_init(flock->fl_start, length, type);
1435                 if (!lock)
1436                         return -ENOMEM;
1437
1438                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1439                 if (rc < 0)
1440                         kfree(lock);
1441                 if (rc <= 0)
1442                         goto out;
1443
1444                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1445                                             type, 1, 0, wait_flag);
1446                 if (rc) {
1447                         kfree(lock);
1448                         goto out;
1449                 }
1450
1451                 cifs_lock_add(cfile, lock);
1452         } else if (unlock)
1453                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1454
1455 out:
1456         if (flock->fl_flags & FL_POSIX)
1457                 posix_lock_file_wait(file, flock);
1458         return rc;
1459 }
1460
1461 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1462 {
1463         int rc, xid;
1464         int lock = 0, unlock = 0;
1465         bool wait_flag = false;
1466         bool posix_lck = false;
1467         struct cifs_sb_info *cifs_sb;
1468         struct cifs_tcon *tcon;
1469         struct cifsInodeInfo *cinode;
1470         struct cifsFileInfo *cfile;
1471         __u16 netfid;
1472         __u32 type;
1473
1474         rc = -EACCES;
1475         xid = get_xid();
1476
1477         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1478                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1479                 flock->fl_start, flock->fl_end);
1480
1481         cfile = (struct cifsFileInfo *)file->private_data;
1482         tcon = tlink_tcon(cfile->tlink);
1483
1484         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1485                         tcon->ses->server);
1486
1487         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1488         netfid = cfile->fid.netfid;
1489         cinode = CIFS_I(file->f_path.dentry->d_inode);
1490
1491         if (cap_unix(tcon->ses) &&
1492             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1493             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1494                 posix_lck = true;
1495         /*
1496          * BB add code here to normalize offset and length to account for
1497          * negative length which we can not accept over the wire.
1498          */
1499         if (IS_GETLK(cmd)) {
1500                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1501                 free_xid(xid);
1502                 return rc;
1503         }
1504
1505         if (!lock && !unlock) {
1506                 /*
1507                  * if no lock or unlock then nothing to do since we do not
1508                  * know what it is
1509                  */
1510                 free_xid(xid);
1511                 return -EOPNOTSUPP;
1512         }
1513
1514         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1515                         xid);
1516         free_xid(xid);
1517         return rc;
1518 }
1519
1520 /*
1521  * update the file size (if needed) after a write. Should be called with
1522  * the inode->i_lock held
1523  */
1524 void
1525 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1526                       unsigned int bytes_written)
1527 {
1528         loff_t end_of_write = offset + bytes_written;
1529
1530         if (end_of_write > cifsi->server_eof)
1531                 cifsi->server_eof = end_of_write;
1532 }
1533
1534 static ssize_t
1535 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1536            size_t write_size, loff_t *offset)
1537 {
1538         int rc = 0;
1539         unsigned int bytes_written = 0;
1540         unsigned int total_written;
1541         struct cifs_sb_info *cifs_sb;
1542         struct cifs_tcon *tcon;
1543         struct TCP_Server_Info *server;
1544         unsigned int xid;
1545         struct dentry *dentry = open_file->dentry;
1546         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1547         struct cifs_io_parms io_parms;
1548
1549         cifs_sb = CIFS_SB(dentry->d_sb);
1550
1551         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1552              *offset, dentry->d_name.name);
1553
1554         tcon = tlink_tcon(open_file->tlink);
1555         server = tcon->ses->server;
1556
1557         if (!server->ops->sync_write)
1558                 return -ENOSYS;
1559
1560         xid = get_xid();
1561
1562         for (total_written = 0; write_size > total_written;
1563              total_written += bytes_written) {
1564                 rc = -EAGAIN;
1565                 while (rc == -EAGAIN) {
1566                         struct kvec iov[2];
1567                         unsigned int len;
1568
1569                         if (open_file->invalidHandle) {
1570                                 /* we could deadlock if we called
1571                                    filemap_fdatawait from here so tell
1572                                    reopen_file not to flush data to
1573                                    server now */
1574                                 rc = cifs_reopen_file(open_file, false);
1575                                 if (rc != 0)
1576                                         break;
1577                         }
1578
1579                         len = min((size_t)cifs_sb->wsize,
1580                                   write_size - total_written);
1581                         /* iov[0] is reserved for smb header */
1582                         iov[1].iov_base = (char *)write_data + total_written;
1583                         iov[1].iov_len = len;
1584                         io_parms.pid = pid;
1585                         io_parms.tcon = tcon;
1586                         io_parms.offset = *offset;
1587                         io_parms.length = len;
1588                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1589                                                      &bytes_written, iov, 1);
1590                 }
1591                 if (rc || (bytes_written == 0)) {
1592                         if (total_written)
1593                                 break;
1594                         else {
1595                                 free_xid(xid);
1596                                 return rc;
1597                         }
1598                 } else {
1599                         spin_lock(&dentry->d_inode->i_lock);
1600                         cifs_update_eof(cifsi, *offset, bytes_written);
1601                         spin_unlock(&dentry->d_inode->i_lock);
1602                         *offset += bytes_written;
1603                 }
1604         }
1605
1606         cifs_stats_bytes_written(tcon, total_written);
1607
1608         if (total_written > 0) {
1609                 spin_lock(&dentry->d_inode->i_lock);
1610                 if (*offset > dentry->d_inode->i_size)
1611                         i_size_write(dentry->d_inode, *offset);
1612                 spin_unlock(&dentry->d_inode->i_lock);
1613         }
1614         mark_inode_dirty_sync(dentry->d_inode);
1615         free_xid(xid);
1616         return total_written;
1617 }
1618
1619 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1620                                         bool fsuid_only)
1621 {
1622         struct cifsFileInfo *open_file = NULL;
1623         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1624
1625         /* only filter by fsuid on multiuser mounts */
1626         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1627                 fsuid_only = false;
1628
1629         spin_lock(&cifs_file_list_lock);
1630         /* we could simply get the first_list_entry since write-only entries
1631            are always at the end of the list but since the first entry might
1632            have a close pending, we go through the whole list */
1633         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1634                 if (fsuid_only && open_file->uid != current_fsuid())
1635                         continue;
1636                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1637                         if (!open_file->invalidHandle) {
1638                                 /* found a good file */
1639                                 /* lock it so it will not be closed on us */
1640                                 cifsFileInfo_get_locked(open_file);
1641                                 spin_unlock(&cifs_file_list_lock);
1642                                 return open_file;
1643                         } /* else might as well continue, and look for
1644                              another, or simply have the caller reopen it
1645                              again rather than trying to fix this handle */
1646                 } else /* write only file */
1647                         break; /* write only files are last so must be done */
1648         }
1649         spin_unlock(&cifs_file_list_lock);
1650         return NULL;
1651 }
1652
1653 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1654                                         bool fsuid_only)
1655 {
1656         struct cifsFileInfo *open_file, *inv_file = NULL;
1657         struct cifs_sb_info *cifs_sb;
1658         bool any_available = false;
1659         int rc;
1660         unsigned int refind = 0;
1661
1662         /* Having a null inode here (because mapping->host was set to zero by
1663         the VFS or MM) should not happen but we had reports of on oops (due to
1664         it being zero) during stress testcases so we need to check for it */
1665
1666         if (cifs_inode == NULL) {
1667                 cERROR(1, "Null inode passed to cifs_writeable_file");
1668                 dump_stack();
1669                 return NULL;
1670         }
1671
1672         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1673
1674         /* only filter by fsuid on multiuser mounts */
1675         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1676                 fsuid_only = false;
1677
1678         spin_lock(&cifs_file_list_lock);
1679 refind_writable:
1680         if (refind > MAX_REOPEN_ATT) {
1681                 spin_unlock(&cifs_file_list_lock);
1682                 return NULL;
1683         }
1684         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1685                 if (!any_available && open_file->pid != current->tgid)
1686                         continue;
1687                 if (fsuid_only && open_file->uid != current_fsuid())
1688                         continue;
1689                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1690                         if (!open_file->invalidHandle) {
1691                                 /* found a good writable file */
1692                                 cifsFileInfo_get_locked(open_file);
1693                                 spin_unlock(&cifs_file_list_lock);
1694                                 return open_file;
1695                         } else {
1696                                 if (!inv_file)
1697                                         inv_file = open_file;
1698                         }
1699                 }
1700         }
1701         /* couldn't find useable FH with same pid, try any available */
1702         if (!any_available) {
1703                 any_available = true;
1704                 goto refind_writable;
1705         }
1706
1707         if (inv_file) {
1708                 any_available = false;
1709                 cifsFileInfo_get_locked(inv_file);
1710         }
1711
1712         spin_unlock(&cifs_file_list_lock);
1713
1714         if (inv_file) {
1715                 rc = cifs_reopen_file(inv_file, false);
1716                 if (!rc)
1717                         return inv_file;
1718                 else {
1719                         spin_lock(&cifs_file_list_lock);
1720                         list_move_tail(&inv_file->flist,
1721                                         &cifs_inode->openFileList);
1722                         spin_unlock(&cifs_file_list_lock);
1723                         cifsFileInfo_put(inv_file);
1724                         spin_lock(&cifs_file_list_lock);
1725                         ++refind;
1726                         goto refind_writable;
1727                 }
1728         }
1729
1730         return NULL;
1731 }
1732
1733 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1734 {
1735         struct address_space *mapping = page->mapping;
1736         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1737         char *write_data;
1738         int rc = -EFAULT;
1739         int bytes_written = 0;
1740         struct inode *inode;
1741         struct cifsFileInfo *open_file;
1742
1743         if (!mapping || !mapping->host)
1744                 return -EFAULT;
1745
1746         inode = page->mapping->host;
1747
1748         offset += (loff_t)from;
1749         write_data = kmap(page);
1750         write_data += from;
1751
1752         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1753                 kunmap(page);
1754                 return -EIO;
1755         }
1756
1757         /* racing with truncate? */
1758         if (offset > mapping->host->i_size) {
1759                 kunmap(page);
1760                 return 0; /* don't care */
1761         }
1762
1763         /* check to make sure that we are not extending the file */
1764         if (mapping->host->i_size - offset < (loff_t)to)
1765                 to = (unsigned)(mapping->host->i_size - offset);
1766
1767         open_file = find_writable_file(CIFS_I(mapping->host), false);
1768         if (open_file) {
1769                 bytes_written = cifs_write(open_file, open_file->pid,
1770                                            write_data, to - from, &offset);
1771                 cifsFileInfo_put(open_file);
1772                 /* Does mm or vfs already set times? */
1773                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1774                 if ((bytes_written > 0) && (offset))
1775                         rc = 0;
1776                 else if (bytes_written < 0)
1777                         rc = bytes_written;
1778         } else {
1779                 cFYI(1, "No writeable filehandles for inode");
1780                 rc = -EIO;
1781         }
1782
1783         kunmap(page);
1784         return rc;
1785 }
1786
1787 static int cifs_writepages(struct address_space *mapping,
1788                            struct writeback_control *wbc)
1789 {
1790         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1791         bool done = false, scanned = false, range_whole = false;
1792         pgoff_t end, index;
1793         struct cifs_writedata *wdata;
1794         struct TCP_Server_Info *server;
1795         struct page *page;
1796         int rc = 0;
1797
1798         /*
1799          * If wsize is smaller than the page cache size, default to writing
1800          * one page at a time via cifs_writepage
1801          */
1802         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1803                 return generic_writepages(mapping, wbc);
1804
1805         if (wbc->range_cyclic) {
1806                 index = mapping->writeback_index; /* Start from prev offset */
1807                 end = -1;
1808         } else {
1809                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1810                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1811                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1812                         range_whole = true;
1813                 scanned = true;
1814         }
1815 retry:
1816         while (!done && index <= end) {
1817                 unsigned int i, nr_pages, found_pages;
1818                 pgoff_t next = 0, tofind;
1819                 struct page **pages;
1820
1821                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1822                                 end - index) + 1;
1823
1824                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1825                                              cifs_writev_complete);
1826                 if (!wdata) {
1827                         rc = -ENOMEM;
1828                         break;
1829                 }
1830
1831                 /*
1832                  * find_get_pages_tag seems to return a max of 256 on each
1833                  * iteration, so we must call it several times in order to
1834                  * fill the array or the wsize is effectively limited to
1835                  * 256 * PAGE_CACHE_SIZE.
1836                  */
1837                 found_pages = 0;
1838                 pages = wdata->pages;
1839                 do {
1840                         nr_pages = find_get_pages_tag(mapping, &index,
1841                                                         PAGECACHE_TAG_DIRTY,
1842                                                         tofind, pages);
1843                         found_pages += nr_pages;
1844                         tofind -= nr_pages;
1845                         pages += nr_pages;
1846                 } while (nr_pages && tofind && index <= end);
1847
1848                 if (found_pages == 0) {
1849                         kref_put(&wdata->refcount, cifs_writedata_release);
1850                         break;
1851                 }
1852
1853                 nr_pages = 0;
1854                 for (i = 0; i < found_pages; i++) {
1855                         page = wdata->pages[i];
1856                         /*
1857                          * At this point we hold neither mapping->tree_lock nor
1858                          * lock on the page itself: the page may be truncated or
1859                          * invalidated (changing page->mapping to NULL), or even
1860                          * swizzled back from swapper_space to tmpfs file
1861                          * mapping
1862                          */
1863
1864                         if (nr_pages == 0)
1865                                 lock_page(page);
1866                         else if (!trylock_page(page))
1867                                 break;
1868
1869                         if (unlikely(page->mapping != mapping)) {
1870                                 unlock_page(page);
1871                                 break;
1872                         }
1873
1874                         if (!wbc->range_cyclic && page->index > end) {
1875                                 done = true;
1876                                 unlock_page(page);
1877                                 break;
1878                         }
1879
1880                         if (next && (page->index != next)) {
1881                                 /* Not next consecutive page */
1882                                 unlock_page(page);
1883                                 break;
1884                         }
1885
1886                         if (wbc->sync_mode != WB_SYNC_NONE)
1887                                 wait_on_page_writeback(page);
1888
1889                         if (PageWriteback(page) ||
1890                                         !clear_page_dirty_for_io(page)) {
1891                                 unlock_page(page);
1892                                 break;
1893                         }
1894
1895                         /*
1896                          * This actually clears the dirty bit in the radix tree.
1897                          * See cifs_writepage() for more commentary.
1898                          */
1899                         set_page_writeback(page);
1900
1901                         if (page_offset(page) >= i_size_read(mapping->host)) {
1902                                 done = true;
1903                                 unlock_page(page);
1904                                 end_page_writeback(page);
1905                                 break;
1906                         }
1907
1908                         wdata->pages[i] = page;
1909                         next = page->index + 1;
1910                         ++nr_pages;
1911                 }
1912
1913                 /* reset index to refind any pages skipped */
1914                 if (nr_pages == 0)
1915                         index = wdata->pages[0]->index + 1;
1916
1917                 /* put any pages we aren't going to use */
1918                 for (i = nr_pages; i < found_pages; i++) {
1919                         page_cache_release(wdata->pages[i]);
1920                         wdata->pages[i] = NULL;
1921                 }
1922
1923                 /* nothing to write? */
1924                 if (nr_pages == 0) {
1925                         kref_put(&wdata->refcount, cifs_writedata_release);
1926                         continue;
1927                 }
1928
1929                 wdata->sync_mode = wbc->sync_mode;
1930                 wdata->nr_pages = nr_pages;
1931                 wdata->offset = page_offset(wdata->pages[0]);
1932                 wdata->pagesz = PAGE_CACHE_SIZE;
1933                 wdata->tailsz =
1934                         min(i_size_read(mapping->host) -
1935                             page_offset(wdata->pages[nr_pages - 1]),
1936                             (loff_t)PAGE_CACHE_SIZE);
1937                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1938                                         wdata->tailsz;
1939
1940                 do {
1941                         if (wdata->cfile != NULL)
1942                                 cifsFileInfo_put(wdata->cfile);
1943                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1944                                                           false);
1945                         if (!wdata->cfile) {
1946                                 cERROR(1, "No writable handles for inode");
1947                                 rc = -EBADF;
1948                                 break;
1949                         }
1950                         wdata->pid = wdata->cfile->pid;
1951                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1952                         rc = server->ops->async_writev(wdata);
1953                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1954
1955                 for (i = 0; i < nr_pages; ++i)
1956                         unlock_page(wdata->pages[i]);
1957
1958                 /* send failure -- clean up the mess */
1959                 if (rc != 0) {
1960                         for (i = 0; i < nr_pages; ++i) {
1961                                 if (rc == -EAGAIN)
1962                                         redirty_page_for_writepage(wbc,
1963                                                            wdata->pages[i]);
1964                                 else
1965                                         SetPageError(wdata->pages[i]);
1966                                 end_page_writeback(wdata->pages[i]);
1967                                 page_cache_release(wdata->pages[i]);
1968                         }
1969                         if (rc != -EAGAIN)
1970                                 mapping_set_error(mapping, rc);
1971                 }
1972                 kref_put(&wdata->refcount, cifs_writedata_release);
1973
1974                 wbc->nr_to_write -= nr_pages;
1975                 if (wbc->nr_to_write <= 0)
1976                         done = true;
1977
1978                 index = next;
1979         }
1980
1981         if (!scanned && !done) {
1982                 /*
1983                  * We hit the last page and there is more work to be done: wrap
1984                  * back to the start of the file
1985                  */
1986                 scanned = true;
1987                 index = 0;
1988                 goto retry;
1989         }
1990
1991         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1992                 mapping->writeback_index = index;
1993
1994         return rc;
1995 }
1996
1997 static int
1998 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1999 {
2000         int rc;
2001         unsigned int xid;
2002
2003         xid = get_xid();
2004 /* BB add check for wbc flags */
2005         page_cache_get(page);
2006         if (!PageUptodate(page))
2007                 cFYI(1, "ppw - page not up to date");
2008
2009         /*
2010          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2011          *
2012          * A writepage() implementation always needs to do either this,
2013          * or re-dirty the page with "redirty_page_for_writepage()" in
2014          * the case of a failure.
2015          *
2016          * Just unlocking the page will cause the radix tree tag-bits
2017          * to fail to update with the state of the page correctly.
2018          */
2019         set_page_writeback(page);
2020 retry_write:
2021         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2022         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2023                 goto retry_write;
2024         else if (rc == -EAGAIN)
2025                 redirty_page_for_writepage(wbc, page);
2026         else if (rc != 0)
2027                 SetPageError(page);
2028         else
2029                 SetPageUptodate(page);
2030         end_page_writeback(page);
2031         page_cache_release(page);
2032         free_xid(xid);
2033         return rc;
2034 }
2035
2036 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2037 {
2038         int rc = cifs_writepage_locked(page, wbc);
2039         unlock_page(page);
2040         return rc;
2041 }
2042
2043 static int cifs_write_end(struct file *file, struct address_space *mapping,
2044                         loff_t pos, unsigned len, unsigned copied,
2045                         struct page *page, void *fsdata)
2046 {
2047         int rc;
2048         struct inode *inode = mapping->host;
2049         struct cifsFileInfo *cfile = file->private_data;
2050         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2051         __u32 pid;
2052
2053         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2054                 pid = cfile->pid;
2055         else
2056                 pid = current->tgid;
2057
2058         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2059                  page, pos, copied);
2060
2061         if (PageChecked(page)) {
2062                 if (copied == len)
2063                         SetPageUptodate(page);
2064                 ClearPageChecked(page);
2065         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2066                 SetPageUptodate(page);
2067
2068         if (!PageUptodate(page)) {
2069                 char *page_data;
2070                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2071                 unsigned int xid;
2072
2073                 xid = get_xid();
2074                 /* this is probably better than directly calling
2075                    partialpage_write since in this function the file handle is
2076                    known which we might as well leverage */
2077                 /* BB check if anything else missing out of ppw
2078                    such as updating last write time */
2079                 page_data = kmap(page);
2080                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2081                 /* if (rc < 0) should we set writebehind rc? */
2082                 kunmap(page);
2083
2084                 free_xid(xid);
2085         } else {
2086                 rc = copied;
2087                 pos += copied;
2088                 set_page_dirty(page);
2089         }
2090
2091         if (rc > 0) {
2092                 spin_lock(&inode->i_lock);
2093                 if (pos > inode->i_size)
2094                         i_size_write(inode, pos);
2095                 spin_unlock(&inode->i_lock);
2096         }
2097
2098         unlock_page(page);
2099         page_cache_release(page);
2100
2101         return rc;
2102 }
2103
2104 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2105                       int datasync)
2106 {
2107         unsigned int xid;
2108         int rc = 0;
2109         struct cifs_tcon *tcon;
2110         struct TCP_Server_Info *server;
2111         struct cifsFileInfo *smbfile = file->private_data;
2112         struct inode *inode = file->f_path.dentry->d_inode;
2113         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2114
2115         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2116         if (rc)
2117                 return rc;
2118         mutex_lock(&inode->i_mutex);
2119
2120         xid = get_xid();
2121
2122         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2123                 file->f_path.dentry->d_name.name, datasync);
2124
2125         if (!CIFS_I(inode)->clientCanCacheRead) {
2126                 rc = cifs_invalidate_mapping(inode);
2127                 if (rc) {
2128                         cFYI(1, "rc: %d during invalidate phase", rc);
2129                         rc = 0; /* don't care about it in fsync */
2130                 }
2131         }
2132
2133         tcon = tlink_tcon(smbfile->tlink);
2134         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2135                 server = tcon->ses->server;
2136                 if (server->ops->flush)
2137                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2138                 else
2139                         rc = -ENOSYS;
2140         }
2141
2142         free_xid(xid);
2143         mutex_unlock(&inode->i_mutex);
2144         return rc;
2145 }
2146
2147 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2148 {
2149         unsigned int xid;
2150         int rc = 0;
2151         struct cifs_tcon *tcon;
2152         struct TCP_Server_Info *server;
2153         struct cifsFileInfo *smbfile = file->private_data;
2154         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2155         struct inode *inode = file->f_mapping->host;
2156
2157         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2158         if (rc)
2159                 return rc;
2160         mutex_lock(&inode->i_mutex);
2161
2162         xid = get_xid();
2163
2164         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2165                 file->f_path.dentry->d_name.name, datasync);
2166
2167         tcon = tlink_tcon(smbfile->tlink);
2168         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2169                 server = tcon->ses->server;
2170                 if (server->ops->flush)
2171                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2172                 else
2173                         rc = -ENOSYS;
2174         }
2175
2176         free_xid(xid);
2177         mutex_unlock(&inode->i_mutex);
2178         return rc;
2179 }
2180
2181 /*
2182  * As file closes, flush all cached write data for this inode checking
2183  * for write behind errors.
2184  */
2185 int cifs_flush(struct file *file, fl_owner_t id)
2186 {
2187         struct inode *inode = file->f_path.dentry->d_inode;
2188         int rc = 0;
2189
2190         if (file->f_mode & FMODE_WRITE)
2191                 rc = filemap_write_and_wait(inode->i_mapping);
2192
2193         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2194
2195         return rc;
2196 }
2197
2198 static int
2199 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2200 {
2201         int rc = 0;
2202         unsigned long i;
2203
2204         for (i = 0; i < num_pages; i++) {
2205                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2206                 if (!pages[i]) {
2207                         /*
2208                          * save number of pages we have already allocated and
2209                          * return with ENOMEM error
2210                          */
2211                         num_pages = i;
2212                         rc = -ENOMEM;
2213                         break;
2214                 }
2215         }
2216
2217         if (rc) {
2218                 for (i = 0; i < num_pages; i++)
2219                         put_page(pages[i]);
2220         }
2221         return rc;
2222 }
2223
2224 static inline
2225 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2226 {
2227         size_t num_pages;
2228         size_t clen;
2229
2230         clen = min_t(const size_t, len, wsize);
2231         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2232
2233         if (cur_len)
2234                 *cur_len = clen;
2235
2236         return num_pages;
2237 }
2238
2239 static void
2240 cifs_uncached_writev_complete(struct work_struct *work)
2241 {
2242         int i;
2243         struct cifs_writedata *wdata = container_of(work,
2244                                         struct cifs_writedata, work);
2245         struct inode *inode = wdata->cfile->dentry->d_inode;
2246         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2247
2248         spin_lock(&inode->i_lock);
2249         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2250         if (cifsi->server_eof > inode->i_size)
2251                 i_size_write(inode, cifsi->server_eof);
2252         spin_unlock(&inode->i_lock);
2253
2254         complete(&wdata->done);
2255
2256         if (wdata->result != -EAGAIN) {
2257                 for (i = 0; i < wdata->nr_pages; i++)
2258                         put_page(wdata->pages[i]);
2259         }
2260
2261         kref_put(&wdata->refcount, cifs_writedata_release);
2262 }
2263
2264 /* attempt to send write to server, retry on any -EAGAIN errors */
2265 static int
2266 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2267 {
2268         int rc;
2269         struct TCP_Server_Info *server;
2270
2271         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2272
2273         do {
2274                 if (wdata->cfile->invalidHandle) {
2275                         rc = cifs_reopen_file(wdata->cfile, false);
2276                         if (rc != 0)
2277                                 continue;
2278                 }
2279                 rc = server->ops->async_writev(wdata);
2280         } while (rc == -EAGAIN);
2281
2282         return rc;
2283 }
2284
2285 static ssize_t
2286 cifs_iovec_write(struct file *file, const struct iovec *iov,
2287                  unsigned long nr_segs, loff_t *poffset)
2288 {
2289         unsigned long nr_pages, i;
2290         size_t copied, len, cur_len;
2291         ssize_t total_written = 0;
2292         loff_t offset;
2293         struct iov_iter it;
2294         struct cifsFileInfo *open_file;
2295         struct cifs_tcon *tcon;
2296         struct cifs_sb_info *cifs_sb;
2297         struct cifs_writedata *wdata, *tmp;
2298         struct list_head wdata_list;
2299         int rc;
2300         pid_t pid;
2301
2302         len = iov_length(iov, nr_segs);
2303         if (!len)
2304                 return 0;
2305
2306         rc = generic_write_checks(file, poffset, &len, 0);
2307         if (rc)
2308                 return rc;
2309
2310         INIT_LIST_HEAD(&wdata_list);
2311         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2312         open_file = file->private_data;
2313         tcon = tlink_tcon(open_file->tlink);
2314
2315         if (!tcon->ses->server->ops->async_writev)
2316                 return -ENOSYS;
2317
2318         offset = *poffset;
2319
2320         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2321                 pid = open_file->pid;
2322         else
2323                 pid = current->tgid;
2324
2325         iov_iter_init(&it, iov, nr_segs, len, 0);
2326         do {
2327                 size_t save_len;
2328
2329                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2330                 wdata = cifs_writedata_alloc(nr_pages,
2331                                              cifs_uncached_writev_complete);
2332                 if (!wdata) {
2333                         rc = -ENOMEM;
2334                         break;
2335                 }
2336
2337                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2338                 if (rc) {
2339                         kfree(wdata);
2340                         break;
2341                 }
2342
2343                 save_len = cur_len;
2344                 for (i = 0; i < nr_pages; i++) {
2345                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2346                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2347                                                          0, copied);
2348                         cur_len -= copied;
2349                         iov_iter_advance(&it, copied);
2350                 }
2351                 cur_len = save_len - cur_len;
2352
2353                 wdata->sync_mode = WB_SYNC_ALL;
2354                 wdata->nr_pages = nr_pages;
2355                 wdata->offset = (__u64)offset;
2356                 wdata->cfile = cifsFileInfo_get(open_file);
2357                 wdata->pid = pid;
2358                 wdata->bytes = cur_len;
2359                 wdata->pagesz = PAGE_SIZE;
2360                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2361                 rc = cifs_uncached_retry_writev(wdata);
2362                 if (rc) {
2363                         kref_put(&wdata->refcount, cifs_writedata_release);
2364                         break;
2365                 }
2366
2367                 list_add_tail(&wdata->list, &wdata_list);
2368                 offset += cur_len;
2369                 len -= cur_len;
2370         } while (len > 0);
2371
2372         /*
2373          * If at least one write was successfully sent, then discard any rc
2374          * value from the later writes. If the other write succeeds, then
2375          * we'll end up returning whatever was written. If it fails, then
2376          * we'll get a new rc value from that.
2377          */
2378         if (!list_empty(&wdata_list))
2379                 rc = 0;
2380
2381         /*
2382          * Wait for and collect replies for any successful sends in order of
2383          * increasing offset. Once an error is hit or we get a fatal signal
2384          * while waiting, then return without waiting for any more replies.
2385          */
2386 restart_loop:
2387         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2388                 if (!rc) {
2389                         /* FIXME: freezable too? */
2390                         rc = wait_for_completion_killable(&wdata->done);
2391                         if (rc)
2392                                 rc = -EINTR;
2393                         else if (wdata->result)
2394                                 rc = wdata->result;
2395                         else
2396                                 total_written += wdata->bytes;
2397
2398                         /* resend call if it's a retryable error */
2399                         if (rc == -EAGAIN) {
2400                                 rc = cifs_uncached_retry_writev(wdata);
2401                                 goto restart_loop;
2402                         }
2403                 }
2404                 list_del_init(&wdata->list);
2405                 kref_put(&wdata->refcount, cifs_writedata_release);
2406         }
2407
2408         if (total_written > 0)
2409                 *poffset += total_written;
2410
2411         cifs_stats_bytes_written(tcon, total_written);
2412         return total_written ? total_written : (ssize_t)rc;
2413 }
2414
2415 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2416                                 unsigned long nr_segs, loff_t pos)
2417 {
2418         ssize_t written;
2419         struct inode *inode;
2420
2421         inode = iocb->ki_filp->f_path.dentry->d_inode;
2422
2423         /*
2424          * BB - optimize the way when signing is disabled. We can drop this
2425          * extra memory-to-memory copying and use iovec buffers for constructing
2426          * write request.
2427          */
2428
2429         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2430         if (written > 0) {
2431                 CIFS_I(inode)->invalid_mapping = true;
2432                 iocb->ki_pos = pos;
2433         }
2434
2435         return written;
2436 }
2437
2438 static ssize_t
2439 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2440             unsigned long nr_segs, loff_t pos)
2441 {
2442         struct file *file = iocb->ki_filp;
2443         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2444         struct inode *inode = file->f_mapping->host;
2445         struct cifsInodeInfo *cinode = CIFS_I(inode);
2446         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2447         ssize_t rc = -EACCES;
2448
2449         BUG_ON(iocb->ki_pos != pos);
2450
2451         sb_start_write(inode->i_sb);
2452
2453         /*
2454          * We need to hold the sem to be sure nobody modifies lock list
2455          * with a brlock that prevents writing.
2456          */
2457         down_read(&cinode->lock_sem);
2458         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2459                                      server->vals->exclusive_lock_type, NULL,
2460                                      true)) {
2461                 mutex_lock(&inode->i_mutex);
2462                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2463                                                &iocb->ki_pos);
2464                 mutex_unlock(&inode->i_mutex);
2465         }
2466
2467         if (rc > 0 || rc == -EIOCBQUEUED) {
2468                 ssize_t err;
2469
2470                 err = generic_write_sync(file, pos, rc);
2471                 if (err < 0 && rc > 0)
2472                         rc = err;
2473         }
2474
2475         up_read(&cinode->lock_sem);
2476         sb_end_write(inode->i_sb);
2477         return rc;
2478 }
2479
2480 ssize_t
2481 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2482                    unsigned long nr_segs, loff_t pos)
2483 {
2484         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2485         struct cifsInodeInfo *cinode = CIFS_I(inode);
2486         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2487         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2488                                                 iocb->ki_filp->private_data;
2489         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2490
2491 #ifdef CONFIG_CIFS_SMB2
2492         /*
2493          * If we have an oplock for read and want to write a data to the file
2494          * we need to store it in the page cache and then push it to the server
2495          * to be sure the next read will get a valid data.
2496          */
2497         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) {
2498                 ssize_t written;
2499                 int rc;
2500
2501                 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
2502                 rc = filemap_fdatawrite(inode->i_mapping);
2503                 if (rc)
2504                         return (ssize_t)rc;
2505
2506                 return written;
2507         }
2508 #endif
2509
2510         /*
2511          * For non-oplocked files in strict cache mode we need to write the data
2512          * to the server exactly from the pos to pos+len-1 rather than flush all
2513          * affected pages because it may cause a error with mandatory locks on
2514          * these pages but not on the region from pos to ppos+len-1.
2515          */
2516
2517         if (!cinode->clientCanCacheAll)
2518                 return cifs_user_writev(iocb, iov, nr_segs, pos);
2519
2520         if (cap_unix(tcon->ses) &&
2521             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2522             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2523                 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2524
2525         return cifs_writev(iocb, iov, nr_segs, pos);
2526 }
2527
2528 static struct cifs_readdata *
2529 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2530 {
2531         struct cifs_readdata *rdata;
2532
2533         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2534                         GFP_KERNEL);
2535         if (rdata != NULL) {
2536                 kref_init(&rdata->refcount);
2537                 INIT_LIST_HEAD(&rdata->list);
2538                 init_completion(&rdata->done);
2539                 INIT_WORK(&rdata->work, complete);
2540         }
2541
2542         return rdata;
2543 }
2544
2545 void
2546 cifs_readdata_release(struct kref *refcount)
2547 {
2548         struct cifs_readdata *rdata = container_of(refcount,
2549                                         struct cifs_readdata, refcount);
2550
2551         if (rdata->cfile)
2552                 cifsFileInfo_put(rdata->cfile);
2553
2554         kfree(rdata);
2555 }
2556
2557 static int
2558 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2559 {
2560         int rc = 0;
2561         struct page *page;
2562         unsigned int i;
2563
2564         for (i = 0; i < nr_pages; i++) {
2565                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2566                 if (!page) {
2567                         rc = -ENOMEM;
2568                         break;
2569                 }
2570                 rdata->pages[i] = page;
2571         }
2572
2573         if (rc) {
2574                 for (i = 0; i < nr_pages; i++) {
2575                         put_page(rdata->pages[i]);
2576                         rdata->pages[i] = NULL;
2577                 }
2578         }
2579         return rc;
2580 }
2581
2582 static void
2583 cifs_uncached_readdata_release(struct kref *refcount)
2584 {
2585         struct cifs_readdata *rdata = container_of(refcount,
2586                                         struct cifs_readdata, refcount);
2587         unsigned int i;
2588
2589         for (i = 0; i < rdata->nr_pages; i++) {
2590                 put_page(rdata->pages[i]);
2591                 rdata->pages[i] = NULL;
2592         }
2593         cifs_readdata_release(refcount);
2594 }
2595
2596 static int
2597 cifs_retry_async_readv(struct cifs_readdata *rdata)
2598 {
2599         int rc;
2600         struct TCP_Server_Info *server;
2601
2602         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2603
2604         do {
2605                 if (rdata->cfile->invalidHandle) {
2606                         rc = cifs_reopen_file(rdata->cfile, true);
2607                         if (rc != 0)
2608                                 continue;
2609                 }
2610                 rc = server->ops->async_readv(rdata);
2611         } while (rc == -EAGAIN);
2612
2613         return rc;
2614 }
2615
2616 /**
2617  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2618  * @rdata:      the readdata response with list of pages holding data
2619  * @iov:        vector in which we should copy the data
2620  * @nr_segs:    number of segments in vector
2621  * @offset:     offset into file of the first iovec
2622  * @copied:     used to return the amount of data copied to the iov
2623  *
2624  * This function copies data from a list of pages in a readdata response into
2625  * an array of iovecs. It will first calculate where the data should go
2626  * based on the info in the readdata and then copy the data into that spot.
2627  */
2628 static ssize_t
2629 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2630                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2631 {
2632         int rc = 0;
2633         struct iov_iter ii;
2634         size_t pos = rdata->offset - offset;
2635         ssize_t remaining = rdata->bytes;
2636         unsigned char *pdata;
2637         unsigned int i;
2638
2639         /* set up iov_iter and advance to the correct offset */
2640         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2641         iov_iter_advance(&ii, pos);
2642
2643         *copied = 0;
2644         for (i = 0; i < rdata->nr_pages; i++) {
2645                 ssize_t copy;
2646                 struct page *page = rdata->pages[i];
2647
2648                 /* copy a whole page or whatever's left */
2649                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2650
2651                 /* ...but limit it to whatever space is left in the iov */
2652                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2653
2654                 /* go while there's data to be copied and no errors */
2655                 if (copy && !rc) {
2656                         pdata = kmap(page);
2657                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2658                                                 (int)copy);
2659                         kunmap(page);
2660                         if (!rc) {
2661                                 *copied += copy;
2662                                 remaining -= copy;
2663                                 iov_iter_advance(&ii, copy);
2664                         }
2665                 }
2666         }
2667
2668         return rc;
2669 }
2670
2671 static void
2672 cifs_uncached_readv_complete(struct work_struct *work)
2673 {
2674         struct cifs_readdata *rdata = container_of(work,
2675                                                 struct cifs_readdata, work);
2676
2677         complete(&rdata->done);
2678         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2679 }
2680
2681 static int
2682 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2683                         struct cifs_readdata *rdata, unsigned int len)
2684 {
2685         int total_read = 0, result = 0;
2686         unsigned int i;
2687         unsigned int nr_pages = rdata->nr_pages;
2688         struct kvec iov;
2689
2690         rdata->tailsz = PAGE_SIZE;
2691         for (i = 0; i < nr_pages; i++) {
2692                 struct page *page = rdata->pages[i];
2693
2694                 if (len >= PAGE_SIZE) {
2695                         /* enough data to fill the page */
2696                         iov.iov_base = kmap(page);
2697                         iov.iov_len = PAGE_SIZE;
2698                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2699                                 i, iov.iov_base, iov.iov_len);
2700                         len -= PAGE_SIZE;
2701                 } else if (len > 0) {
2702                         /* enough for partial page, fill and zero the rest */
2703                         iov.iov_base = kmap(page);
2704                         iov.iov_len = len;
2705                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2706                                 i, iov.iov_base, iov.iov_len);
2707                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2708                         rdata->tailsz = len;
2709                         len = 0;
2710                 } else {
2711                         /* no need to hold page hostage */
2712                         rdata->pages[i] = NULL;
2713                         rdata->nr_pages--;
2714                         put_page(page);
2715                         continue;
2716                 }
2717
2718                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2719                 kunmap(page);
2720                 if (result < 0)
2721                         break;
2722
2723                 total_read += result;
2724         }
2725
2726         return total_read > 0 ? total_read : result;
2727 }
2728
2729 static ssize_t
2730 cifs_iovec_read(struct file *file, const struct iovec *iov,
2731                  unsigned long nr_segs, loff_t *poffset)
2732 {
2733         ssize_t rc;
2734         size_t len, cur_len;
2735         ssize_t total_read = 0;
2736         loff_t offset = *poffset;
2737         unsigned int npages;
2738         struct cifs_sb_info *cifs_sb;
2739         struct cifs_tcon *tcon;
2740         struct cifsFileInfo *open_file;
2741         struct cifs_readdata *rdata, *tmp;
2742         struct list_head rdata_list;
2743         pid_t pid;
2744
2745         if (!nr_segs)
2746                 return 0;
2747
2748         len = iov_length(iov, nr_segs);
2749         if (!len)
2750                 return 0;
2751
2752         INIT_LIST_HEAD(&rdata_list);
2753         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2754         open_file = file->private_data;
2755         tcon = tlink_tcon(open_file->tlink);
2756
2757         if (!tcon->ses->server->ops->async_readv)
2758                 return -ENOSYS;
2759
2760         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2761                 pid = open_file->pid;
2762         else
2763                 pid = current->tgid;
2764
2765         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2766                 cFYI(1, "attempting read on write only file instance");
2767
2768         do {
2769                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2770                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2771
2772                 /* allocate a readdata struct */
2773                 rdata = cifs_readdata_alloc(npages,
2774                                             cifs_uncached_readv_complete);
2775                 if (!rdata) {
2776                         rc = -ENOMEM;
2777                         goto error;
2778                 }
2779
2780                 rc = cifs_read_allocate_pages(rdata, npages);
2781                 if (rc)
2782                         goto error;
2783
2784                 rdata->cfile = cifsFileInfo_get(open_file);
2785                 rdata->nr_pages = npages;
2786                 rdata->offset = offset;
2787                 rdata->bytes = cur_len;
2788                 rdata->pid = pid;
2789                 rdata->pagesz = PAGE_SIZE;
2790                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2791
2792                 rc = cifs_retry_async_readv(rdata);
2793 error:
2794                 if (rc) {
2795                         kref_put(&rdata->refcount,
2796                                  cifs_uncached_readdata_release);
2797                         break;
2798                 }
2799
2800                 list_add_tail(&rdata->list, &rdata_list);
2801                 offset += cur_len;
2802                 len -= cur_len;
2803         } while (len > 0);
2804
2805         /* if at least one read request send succeeded, then reset rc */
2806         if (!list_empty(&rdata_list))
2807                 rc = 0;
2808
2809         /* the loop below should proceed in the order of increasing offsets */
2810 restart_loop:
2811         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2812                 if (!rc) {
2813                         ssize_t copied;
2814
2815                         /* FIXME: freezable sleep too? */
2816                         rc = wait_for_completion_killable(&rdata->done);
2817                         if (rc)
2818                                 rc = -EINTR;
2819                         else if (rdata->result)
2820                                 rc = rdata->result;
2821                         else {
2822                                 rc = cifs_readdata_to_iov(rdata, iov,
2823                                                         nr_segs, *poffset,
2824                                                         &copied);
2825                                 total_read += copied;
2826                         }
2827
2828                         /* resend call if it's a retryable error */
2829                         if (rc == -EAGAIN) {
2830                                 rc = cifs_retry_async_readv(rdata);
2831                                 goto restart_loop;
2832                         }
2833                 }
2834                 list_del_init(&rdata->list);
2835                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2836         }
2837
2838         cifs_stats_bytes_read(tcon, total_read);
2839         *poffset += total_read;
2840
2841         /* mask nodata case */
2842         if (rc == -ENODATA)
2843                 rc = 0;
2844
2845         return total_read ? total_read : rc;
2846 }
2847
2848 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2849                                unsigned long nr_segs, loff_t pos)
2850 {
2851         ssize_t read;
2852
2853         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2854         if (read > 0)
2855                 iocb->ki_pos = pos;
2856
2857         return read;
2858 }
2859
2860 ssize_t
2861 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2862                   unsigned long nr_segs, loff_t pos)
2863 {
2864         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2865         struct cifsInodeInfo *cinode = CIFS_I(inode);
2866         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2867         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2868                                                 iocb->ki_filp->private_data;
2869         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2870         int rc = -EACCES;
2871
2872         /*
2873          * In strict cache mode we need to read from the server all the time
2874          * if we don't have level II oplock because the server can delay mtime
2875          * change - so we can't make a decision about inode invalidating.
2876          * And we can also fail with pagereading if there are mandatory locks
2877          * on pages affected by this read but not on the region from pos to
2878          * pos+len-1.
2879          */
2880         if (!cinode->clientCanCacheRead)
2881                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2882
2883         if (cap_unix(tcon->ses) &&
2884             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2885             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2886                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2887
2888         /*
2889          * We need to hold the sem to be sure nobody modifies lock list
2890          * with a brlock that prevents reading.
2891          */
2892         down_read(&cinode->lock_sem);
2893         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2894                                      tcon->ses->server->vals->shared_lock_type,
2895                                      NULL, true))
2896                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2897         up_read(&cinode->lock_sem);
2898         return rc;
2899 }
2900
2901 static ssize_t
2902 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2903 {
2904         int rc = -EACCES;
2905         unsigned int bytes_read = 0;
2906         unsigned int total_read;
2907         unsigned int current_read_size;
2908         unsigned int rsize;
2909         struct cifs_sb_info *cifs_sb;
2910         struct cifs_tcon *tcon;
2911         struct TCP_Server_Info *server;
2912         unsigned int xid;
2913         char *cur_offset;
2914         struct cifsFileInfo *open_file;
2915         struct cifs_io_parms io_parms;
2916         int buf_type = CIFS_NO_BUFFER;
2917         __u32 pid;
2918
2919         xid = get_xid();
2920         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2921
2922         /* FIXME: set up handlers for larger reads and/or convert to async */
2923         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2924
2925         if (file->private_data == NULL) {
2926                 rc = -EBADF;
2927                 free_xid(xid);
2928                 return rc;
2929         }
2930         open_file = file->private_data;
2931         tcon = tlink_tcon(open_file->tlink);
2932         server = tcon->ses->server;
2933
2934         if (!server->ops->sync_read) {
2935                 free_xid(xid);
2936                 return -ENOSYS;
2937         }
2938
2939         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2940                 pid = open_file->pid;
2941         else
2942                 pid = current->tgid;
2943
2944         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2945                 cFYI(1, "attempting read on write only file instance");
2946
2947         for (total_read = 0, cur_offset = read_data; read_size > total_read;
2948              total_read += bytes_read, cur_offset += bytes_read) {
2949                 current_read_size = min_t(uint, read_size - total_read, rsize);
2950                 /*
2951                  * For windows me and 9x we do not want to request more than it
2952                  * negotiated since it will refuse the read then.
2953                  */
2954                 if ((tcon->ses) && !(tcon->ses->capabilities &
2955                                 tcon->ses->server->vals->cap_large_files)) {
2956                         current_read_size = min_t(uint, current_read_size,
2957                                         CIFSMaxBufSize);
2958                 }
2959                 rc = -EAGAIN;
2960                 while (rc == -EAGAIN) {
2961                         if (open_file->invalidHandle) {
2962                                 rc = cifs_reopen_file(open_file, true);
2963                                 if (rc != 0)
2964                                         break;
2965                         }
2966                         io_parms.pid = pid;
2967                         io_parms.tcon = tcon;
2968                         io_parms.offset = *offset;
2969                         io_parms.length = current_read_size;
2970                         rc = server->ops->sync_read(xid, open_file, &io_parms,
2971                                                     &bytes_read, &cur_offset,
2972                                                     &buf_type);
2973                 }
2974                 if (rc || (bytes_read == 0)) {
2975                         if (total_read) {
2976                                 break;
2977                         } else {
2978                                 free_xid(xid);
2979                                 return rc;
2980                         }
2981                 } else {
2982                         cifs_stats_bytes_read(tcon, total_read);
2983                         *offset += bytes_read;
2984                 }
2985         }
2986         free_xid(xid);
2987         return total_read;
2988 }
2989
2990 /*
2991  * If the page is mmap'ed into a process' page tables, then we need to make
2992  * sure that it doesn't change while being written back.
2993  */
2994 static int
2995 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2996 {
2997         struct page *page = vmf->page;
2998
2999         lock_page(page);
3000         return VM_FAULT_LOCKED;
3001 }
3002
3003 static struct vm_operations_struct cifs_file_vm_ops = {
3004         .fault = filemap_fault,
3005         .page_mkwrite = cifs_page_mkwrite,
3006         .remap_pages = generic_file_remap_pages,
3007 };
3008
3009 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3010 {
3011         int rc, xid;
3012         struct inode *inode = file->f_path.dentry->d_inode;
3013
3014         xid = get_xid();
3015
3016         if (!CIFS_I(inode)->clientCanCacheRead) {
3017                 rc = cifs_invalidate_mapping(inode);
3018                 if (rc)
3019                         return rc;
3020         }
3021
3022         rc = generic_file_mmap(file, vma);
3023         if (rc == 0)
3024                 vma->vm_ops = &cifs_file_vm_ops;
3025         free_xid(xid);
3026         return rc;
3027 }
3028
3029 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3030 {
3031         int rc, xid;
3032
3033         xid = get_xid();
3034         rc = cifs_revalidate_file(file);
3035         if (rc) {
3036                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3037                 free_xid(xid);
3038                 return rc;
3039         }
3040         rc = generic_file_mmap(file, vma);
3041         if (rc == 0)
3042                 vma->vm_ops = &cifs_file_vm_ops;
3043         free_xid(xid);
3044         return rc;
3045 }
3046
3047 static void
3048 cifs_readv_complete(struct work_struct *work)
3049 {
3050         unsigned int i;
3051         struct cifs_readdata *rdata = container_of(work,
3052                                                 struct cifs_readdata, work);
3053
3054         for (i = 0; i < rdata->nr_pages; i++) {
3055                 struct page *page = rdata->pages[i];
3056
3057                 lru_cache_add_file(page);
3058
3059                 if (rdata->result == 0) {
3060                         flush_dcache_page(page);
3061                         SetPageUptodate(page);
3062                 }
3063
3064                 unlock_page(page);
3065
3066                 if (rdata->result == 0)
3067                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3068
3069                 page_cache_release(page);
3070                 rdata->pages[i] = NULL;
3071         }
3072         kref_put(&rdata->refcount, cifs_readdata_release);
3073 }
3074
3075 static int
3076 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3077                         struct cifs_readdata *rdata, unsigned int len)
3078 {
3079         int total_read = 0, result = 0;
3080         unsigned int i;
3081         u64 eof;
3082         pgoff_t eof_index;
3083         unsigned int nr_pages = rdata->nr_pages;
3084         struct kvec iov;
3085
3086         /* determine the eof that the server (probably) has */
3087         eof = CIFS_I(rdata->mapping->host)->server_eof;
3088         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3089         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3090
3091         rdata->tailsz = PAGE_CACHE_SIZE;
3092         for (i = 0; i < nr_pages; i++) {
3093                 struct page *page = rdata->pages[i];
3094
3095                 if (len >= PAGE_CACHE_SIZE) {
3096                         /* enough data to fill the page */
3097                         iov.iov_base = kmap(page);
3098                         iov.iov_len = PAGE_CACHE_SIZE;
3099                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3100                                 i, page->index, iov.iov_base, iov.iov_len);
3101                         len -= PAGE_CACHE_SIZE;
3102                 } else if (len > 0) {
3103                         /* enough for partial page, fill and zero the rest */
3104                         iov.iov_base = kmap(page);
3105                         iov.iov_len = len;
3106                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3107                                 i, page->index, iov.iov_base, iov.iov_len);
3108                         memset(iov.iov_base + len,
3109                                 '\0', PAGE_CACHE_SIZE - len);
3110                         rdata->tailsz = len;
3111                         len = 0;
3112                 } else if (page->index > eof_index) {
3113                         /*
3114                          * The VFS will not try to do readahead past the
3115                          * i_size, but it's possible that we have outstanding
3116                          * writes with gaps in the middle and the i_size hasn't
3117                          * caught up yet. Populate those with zeroed out pages
3118                          * to prevent the VFS from repeatedly attempting to
3119                          * fill them until the writes are flushed.
3120                          */
3121                         zero_user(page, 0, PAGE_CACHE_SIZE);
3122                         lru_cache_add_file(page);
3123                         flush_dcache_page(page);
3124                         SetPageUptodate(page);
3125                         unlock_page(page);
3126                         page_cache_release(page);
3127                         rdata->pages[i] = NULL;
3128                         rdata->nr_pages--;
3129                         continue;
3130                 } else {
3131                         /* no need to hold page hostage */
3132                         lru_cache_add_file(page);
3133                         unlock_page(page);
3134                         page_cache_release(page);
3135                         rdata->pages[i] = NULL;
3136                         rdata->nr_pages--;
3137                         continue;
3138                 }
3139
3140                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3141                 kunmap(page);
3142                 if (result < 0)
3143                         break;
3144
3145                 total_read += result;
3146         }
3147
3148         return total_read > 0 ? total_read : result;
3149 }
3150
3151 static int cifs_readpages(struct file *file, struct address_space *mapping,
3152         struct list_head *page_list, unsigned num_pages)
3153 {
3154         int rc;
3155         struct list_head tmplist;
3156         struct cifsFileInfo *open_file = file->private_data;
3157         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3158         unsigned int rsize = cifs_sb->rsize;
3159         pid_t pid;
3160
3161         /*
3162          * Give up immediately if rsize is too small to read an entire page.
3163          * The VFS will fall back to readpage. We should never reach this
3164          * point however since we set ra_pages to 0 when the rsize is smaller
3165          * than a cache page.
3166          */
3167         if (unlikely(rsize < PAGE_CACHE_SIZE))
3168                 return 0;
3169
3170         /*
3171          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3172          * immediately if the cookie is negative
3173          */
3174         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3175                                          &num_pages);
3176         if (rc == 0)
3177                 return rc;
3178
3179         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3180                 pid = open_file->pid;
3181         else
3182                 pid = current->tgid;
3183
3184         rc = 0;
3185         INIT_LIST_HEAD(&tmplist);
3186
3187         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3188                 mapping, num_pages);
3189
3190         /*
3191          * Start with the page at end of list and move it to private
3192          * list. Do the same with any following pages until we hit
3193          * the rsize limit, hit an index discontinuity, or run out of
3194          * pages. Issue the async read and then start the loop again
3195          * until the list is empty.
3196          *
3197          * Note that list order is important. The page_list is in
3198          * the order of declining indexes. When we put the pages in
3199          * the rdata->pages, then we want them in increasing order.
3200          */
3201         while (!list_empty(page_list)) {
3202                 unsigned int i;
3203                 unsigned int bytes = PAGE_CACHE_SIZE;
3204                 unsigned int expected_index;
3205                 unsigned int nr_pages = 1;
3206                 loff_t offset;
3207                 struct page *page, *tpage;
3208                 struct cifs_readdata *rdata;
3209
3210                 page = list_entry(page_list->prev, struct page, lru);
3211
3212                 /*
3213                  * Lock the page and put it in the cache. Since no one else
3214                  * should have access to this page, we're safe to simply set
3215                  * PG_locked without checking it first.
3216                  */
3217                 __set_page_locked(page);
3218                 rc = add_to_page_cache_locked(page, mapping,
3219                                               page->index, GFP_KERNEL);
3220
3221                 /* give up if we can't stick it in the cache */
3222                 if (rc) {
3223                         __clear_page_locked(page);
3224                         break;
3225                 }
3226
3227                 /* move first page to the tmplist */
3228                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3229                 list_move_tail(&page->lru, &tmplist);
3230
3231                 /* now try and add more pages onto the request */
3232                 expected_index = page->index + 1;
3233                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3234                         /* discontinuity ? */
3235                         if (page->index != expected_index)
3236                                 break;
3237
3238                         /* would this page push the read over the rsize? */
3239                         if (bytes + PAGE_CACHE_SIZE > rsize)
3240                                 break;
3241
3242                         __set_page_locked(page);
3243                         if (add_to_page_cache_locked(page, mapping,
3244                                                 page->index, GFP_KERNEL)) {
3245                                 __clear_page_locked(page);
3246                                 break;
3247                         }
3248                         list_move_tail(&page->lru, &tmplist);
3249                         bytes += PAGE_CACHE_SIZE;
3250                         expected_index++;
3251                         nr_pages++;
3252                 }
3253
3254                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3255                 if (!rdata) {
3256                         /* best to give up if we're out of mem */
3257                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3258                                 list_del(&page->lru);
3259                                 lru_cache_add_file(page);
3260                                 unlock_page(page);
3261                                 page_cache_release(page);
3262                         }
3263                         rc = -ENOMEM;
3264                         break;
3265                 }
3266
3267                 rdata->cfile = cifsFileInfo_get(open_file);
3268                 rdata->mapping = mapping;
3269                 rdata->offset = offset;
3270                 rdata->bytes = bytes;
3271                 rdata->pid = pid;
3272                 rdata->pagesz = PAGE_CACHE_SIZE;
3273                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3274
3275                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3276                         list_del(&page->lru);
3277                         rdata->pages[rdata->nr_pages++] = page;
3278                 }
3279
3280                 rc = cifs_retry_async_readv(rdata);
3281                 if (rc != 0) {
3282                         for (i = 0; i < rdata->nr_pages; i++) {
3283                                 page = rdata->pages[i];
3284                                 lru_cache_add_file(page);
3285                                 unlock_page(page);
3286                                 page_cache_release(page);
3287                         }
3288                         kref_put(&rdata->refcount, cifs_readdata_release);
3289                         break;
3290                 }
3291
3292                 kref_put(&rdata->refcount, cifs_readdata_release);
3293         }
3294
3295         return rc;
3296 }
3297
3298 static int cifs_readpage_worker(struct file *file, struct page *page,
3299         loff_t *poffset)
3300 {
3301         char *read_data;
3302         int rc;
3303
3304         /* Is the page cached? */
3305         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3306         if (rc == 0)
3307                 goto read_complete;
3308
3309         page_cache_get(page);
3310         read_data = kmap(page);
3311         /* for reads over a certain size could initiate async read ahead */
3312
3313         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3314
3315         if (rc < 0)
3316                 goto io_error;
3317         else
3318                 cFYI(1, "Bytes read %d", rc);
3319
3320         file->f_path.dentry->d_inode->i_atime =
3321                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3322
3323         if (PAGE_CACHE_SIZE > rc)
3324                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3325
3326         flush_dcache_page(page);
3327         SetPageUptodate(page);
3328
3329         /* send this page to the cache */
3330         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3331
3332         rc = 0;
3333
3334 io_error:
3335         kunmap(page);
3336         page_cache_release(page);
3337
3338 read_complete:
3339         return rc;
3340 }
3341
3342 static int cifs_readpage(struct file *file, struct page *page)
3343 {
3344         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3345         int rc = -EACCES;
3346         unsigned int xid;
3347
3348         xid = get_xid();
3349
3350         if (file->private_data == NULL) {
3351                 rc = -EBADF;
3352                 free_xid(xid);
3353                 return rc;
3354         }
3355
3356         cFYI(1, "readpage %p at offset %d 0x%x",
3357                  page, (int)offset, (int)offset);
3358
3359         rc = cifs_readpage_worker(file, page, &offset);
3360
3361         unlock_page(page);
3362
3363         free_xid(xid);
3364         return rc;
3365 }
3366
3367 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3368 {
3369         struct cifsFileInfo *open_file;
3370
3371         spin_lock(&cifs_file_list_lock);
3372         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3373                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3374                         spin_unlock(&cifs_file_list_lock);
3375                         return 1;
3376                 }
3377         }
3378         spin_unlock(&cifs_file_list_lock);
3379         return 0;
3380 }
3381
3382 /* We do not want to update the file size from server for inodes
3383    open for write - to avoid races with writepage extending
3384    the file - in the future we could consider allowing
3385    refreshing the inode only on increases in the file size
3386    but this is tricky to do without racing with writebehind
3387    page caching in the current Linux kernel design */
3388 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3389 {
3390         if (!cifsInode)
3391                 return true;
3392
3393         if (is_inode_writable(cifsInode)) {
3394                 /* This inode is open for write at least once */
3395                 struct cifs_sb_info *cifs_sb;
3396
3397                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3398                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3399                         /* since no page cache to corrupt on directio
3400                         we can change size safely */
3401                         return true;
3402                 }
3403
3404                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3405                         return true;
3406
3407                 return false;
3408         } else
3409                 return true;
3410 }
3411
3412 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3413                         loff_t pos, unsigned len, unsigned flags,
3414                         struct page **pagep, void **fsdata)
3415 {
3416         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3417         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3418         loff_t page_start = pos & PAGE_MASK;
3419         loff_t i_size;
3420         struct page *page;
3421         int rc = 0;
3422
3423         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3424
3425         page = grab_cache_page_write_begin(mapping, index, flags);
3426         if (!page) {
3427                 rc = -ENOMEM;
3428                 goto out;
3429         }
3430
3431         if (PageUptodate(page))
3432                 goto out;
3433
3434         /*
3435          * If we write a full page it will be up to date, no need to read from
3436          * the server. If the write is short, we'll end up doing a sync write
3437          * instead.
3438          */
3439         if (len == PAGE_CACHE_SIZE)
3440                 goto out;
3441
3442         /*
3443          * optimize away the read when we have an oplock, and we're not
3444          * expecting to use any of the data we'd be reading in. That
3445          * is, when the page lies beyond the EOF, or straddles the EOF
3446          * and the write will cover all of the existing data.
3447          */
3448         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3449                 i_size = i_size_read(mapping->host);
3450                 if (page_start >= i_size ||
3451                     (offset == 0 && (pos + len) >= i_size)) {
3452                         zero_user_segments(page, 0, offset,
3453                                            offset + len,
3454                                            PAGE_CACHE_SIZE);
3455                         /*
3456                          * PageChecked means that the parts of the page
3457                          * to which we're not writing are considered up
3458                          * to date. Once the data is copied to the
3459                          * page, it can be set uptodate.
3460                          */
3461                         SetPageChecked(page);
3462                         goto out;
3463                 }
3464         }
3465
3466         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3467                 /*
3468                  * might as well read a page, it is fast enough. If we get
3469                  * an error, we don't need to return it. cifs_write_end will
3470                  * do a sync write instead since PG_uptodate isn't set.
3471                  */
3472                 cifs_readpage_worker(file, page, &page_start);
3473         } else {
3474                 /* we could try using another file handle if there is one -
3475                    but how would we lock it to prevent close of that handle
3476                    racing with this read? In any case
3477                    this will be written out by write_end so is fine */
3478         }
3479 out:
3480         *pagep = page;
3481         return rc;
3482 }
3483
3484 static int cifs_release_page(struct page *page, gfp_t gfp)
3485 {
3486         if (PagePrivate(page))
3487                 return 0;
3488
3489         return cifs_fscache_release_page(page, gfp);
3490 }
3491
3492 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3493 {
3494         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3495
3496         if (offset == 0)
3497                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3498 }
3499
3500 static int cifs_launder_page(struct page *page)
3501 {
3502         int rc = 0;
3503         loff_t range_start = page_offset(page);
3504         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3505         struct writeback_control wbc = {
3506                 .sync_mode = WB_SYNC_ALL,
3507                 .nr_to_write = 0,
3508                 .range_start = range_start,
3509                 .range_end = range_end,
3510         };
3511
3512         cFYI(1, "Launder page: %p", page);
3513
3514         if (clear_page_dirty_for_io(page))
3515                 rc = cifs_writepage_locked(page, &wbc);
3516
3517         cifs_fscache_invalidate_page(page, page->mapping->host);
3518         return rc;
3519 }
3520
3521 void cifs_oplock_break(struct work_struct *work)
3522 {
3523         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3524                                                   oplock_break);
3525         struct inode *inode = cfile->dentry->d_inode;
3526         struct cifsInodeInfo *cinode = CIFS_I(inode);
3527         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3528         int rc = 0;
3529
3530         if (inode && S_ISREG(inode->i_mode)) {
3531                 if (cinode->clientCanCacheRead)
3532                         break_lease(inode, O_RDONLY);
3533                 else
3534                         break_lease(inode, O_WRONLY);
3535                 rc = filemap_fdatawrite(inode->i_mapping);
3536                 if (cinode->clientCanCacheRead == 0) {
3537                         rc = filemap_fdatawait(inode->i_mapping);
3538                         mapping_set_error(inode->i_mapping, rc);
3539                         invalidate_remote_inode(inode);
3540                 }
3541                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3542         }
3543
3544         rc = cifs_push_locks(cfile);
3545         if (rc)
3546                 cERROR(1, "Push locks rc = %d", rc);
3547
3548         /*
3549          * releasing stale oplock after recent reconnect of smb session using
3550          * a now incorrect file handle is not a data integrity issue but do
3551          * not bother sending an oplock release if session to server still is
3552          * disconnected since oplock already released by the server
3553          */
3554         if (!cfile->oplock_break_cancelled) {
3555                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3556                                                              cinode);
3557                 cFYI(1, "Oplock release rc = %d", rc);
3558         }
3559 }
3560
3561 const struct address_space_operations cifs_addr_ops = {
3562         .readpage = cifs_readpage,
3563         .readpages = cifs_readpages,
3564         .writepage = cifs_writepage,
3565         .writepages = cifs_writepages,
3566         .write_begin = cifs_write_begin,
3567         .write_end = cifs_write_end,
3568         .set_page_dirty = __set_page_dirty_nobuffers,
3569         .releasepage = cifs_release_page,
3570         .invalidatepage = cifs_invalidate_page,
3571         .launder_page = cifs_launder_page,
3572 };
3573
3574 /*
3575  * cifs_readpages requires the server to support a buffer large enough to
3576  * contain the header plus one complete page of data.  Otherwise, we need
3577  * to leave cifs_readpages out of the address space operations.
3578  */
3579 const struct address_space_operations cifs_addr_ops_smallbuf = {
3580         .readpage = cifs_readpage,
3581         .writepage = cifs_writepage,
3582         .writepages = cifs_writepages,
3583         .write_begin = cifs_write_begin,
3584         .write_end = cifs_write_end,
3585         .set_page_dirty = __set_page_dirty_nobuffers,
3586         .releasepage = cifs_release_page,
3587         .invalidatepage = cifs_invalidate_page,
3588         .launder_page = cifs_launder_page,
3589 };