2 * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
9 #include <linux/miscdevice.h>
10 #include <linux/init.h>
11 #include <linux/wait.h>
12 #include <linux/module.h>
13 #include <linux/file.h>
15 #include <linux/poll.h>
16 #include <linux/signal.h>
17 #include <linux/spinlock.h>
18 #include <linux/dlm.h>
19 #include <linux/dlm_device.h>
21 #include "dlm_internal.h"
22 #include "lockspace.h"
24 #include "lvb_table.h"
27 static const char *name_prefix="dlm";
28 static struct miscdevice ctl_device;
29 static const struct file_operations device_fops;
33 struct dlm_lock_params32 {
47 char lvb[DLM_USER_LVB_LEN];
51 struct dlm_write_request32 {
58 struct dlm_lock_params32 lock;
59 struct dlm_lspace_params lspace;
60 struct dlm_purge_params purge;
71 struct dlm_lock_result32 {
77 struct dlm_lksb32 lksb;
80 /* Offsets may be zero if no data is present */
84 static void compat_input(struct dlm_write_request *kb,
85 struct dlm_write_request32 *kb32)
87 kb->version[0] = kb32->version[0];
88 kb->version[1] = kb32->version[1];
89 kb->version[2] = kb32->version[2];
92 kb->is64bit = kb32->is64bit;
93 if (kb->cmd == DLM_USER_CREATE_LOCKSPACE ||
94 kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
95 kb->i.lspace.flags = kb32->i.lspace.flags;
96 kb->i.lspace.minor = kb32->i.lspace.minor;
97 strcpy(kb->i.lspace.name, kb32->i.lspace.name);
98 } else if (kb->cmd == DLM_USER_PURGE) {
99 kb->i.purge.nodeid = kb32->i.purge.nodeid;
100 kb->i.purge.pid = kb32->i.purge.pid;
102 kb->i.lock.mode = kb32->i.lock.mode;
103 kb->i.lock.namelen = kb32->i.lock.namelen;
104 kb->i.lock.flags = kb32->i.lock.flags;
105 kb->i.lock.lkid = kb32->i.lock.lkid;
106 kb->i.lock.parent = kb32->i.lock.parent;
107 kb->i.lock.xid = kb32->i.lock.xid;
108 kb->i.lock.timeout = kb32->i.lock.timeout;
109 kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
110 kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
111 kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
112 kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
113 kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
114 memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
115 memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen);
119 static void compat_output(struct dlm_lock_result *res,
120 struct dlm_lock_result32 *res32)
122 res32->version[0] = res->version[0];
123 res32->version[1] = res->version[1];
124 res32->version[2] = res->version[2];
126 res32->user_astaddr = (__u32)(long)res->user_astaddr;
127 res32->user_astparam = (__u32)(long)res->user_astparam;
128 res32->user_lksb = (__u32)(long)res->user_lksb;
129 res32->bast_mode = res->bast_mode;
131 res32->lvb_offset = res->lvb_offset;
132 res32->length = res->length;
134 res32->lksb.sb_status = res->lksb.sb_status;
135 res32->lksb.sb_flags = res->lksb.sb_flags;
136 res32->lksb.sb_lkid = res->lksb.sb_lkid;
137 res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr;
141 /* Figure out if this lock is at the end of its life and no longer
142 available for the application to use. The lkb still exists until
143 the final ast is read. A lock becomes EOL in three situations:
144 1. a noqueue request fails with EAGAIN
145 2. an unlock completes with EUNLOCK
146 3. a cancel of a waiting request completes with ECANCEL/EDEADLK
147 An EOL lock needs to be removed from the process's list of locks.
148 And we can't allow any new operation on an EOL lock. This is
149 not related to the lifetime of the lkb struct which is managed
150 entirely by refcount. */
152 static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
159 if (lkb->lkb_grmode == DLM_LOCK_IV)
163 if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
170 /* we could possibly check if the cancel of an orphan has resulted in the lkb
171 being removed and then remove that lkb from the orphans list and free it */
173 void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
176 struct dlm_user_args *ua;
177 struct dlm_user_proc *proc;
178 int eol = 0, ast_type;
180 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
183 ls = lkb->lkb_resource->res_ls;
184 mutex_lock(&ls->ls_clear_proc_locks);
186 /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
187 can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
188 lkb->ua so we can't try to use it. This second check is necessary
189 for cases where a completion ast is received for an operation that
190 began before clear_proc_locks did its cancel/unlock. */
192 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
195 DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb););
196 ua = (struct dlm_user_args *)lkb->lkb_astparam;
199 if (type == AST_BAST && ua->bastaddr == NULL)
202 spin_lock(&proc->asts_spin);
204 ast_type = lkb->lkb_ast_type;
205 lkb->lkb_ast_type |= type;
208 kref_get(&lkb->lkb_ref);
209 list_add_tail(&lkb->lkb_astqueue, &proc->asts);
210 wake_up_interruptible(&proc->wait);
212 if (type == AST_COMP && (ast_type & AST_COMP))
213 log_debug(ls, "ast overlap %x status %x %x",
214 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
216 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
218 lkb->lkb_ast_type &= ~AST_BAST;
219 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
222 /* We want to copy the lvb to userspace when the completion
223 ast is read if the status is 0, the lock has an lvb and
224 lvb_ops says we should. We could probably have set_lvb_lock()
225 set update_user_lvb instead and not need old_mode */
227 if ((lkb->lkb_ast_type & AST_COMP) &&
228 (lkb->lkb_lksb->sb_status == 0) &&
229 lkb->lkb_lksb->sb_lvbptr &&
230 dlm_lvb_operations[ua->old_mode + 1][lkb->lkb_grmode + 1])
231 ua->update_user_lvb = 1;
233 ua->update_user_lvb = 0;
235 spin_unlock(&proc->asts_spin);
238 spin_lock(&ua->proc->locks_spin);
239 if (!list_empty(&lkb->lkb_ownqueue)) {
240 list_del_init(&lkb->lkb_ownqueue);
243 spin_unlock(&ua->proc->locks_spin);
246 mutex_unlock(&ls->ls_clear_proc_locks);
249 static int device_user_lock(struct dlm_user_proc *proc,
250 struct dlm_lock_params *params)
253 struct dlm_user_args *ua;
256 ls = dlm_find_lockspace_local(proc->lockspace);
260 if (!params->castaddr || !params->lksb) {
265 ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL);
269 ua->user_lksb = params->lksb;
270 ua->castparam = params->castparam;
271 ua->castaddr = params->castaddr;
272 ua->bastparam = params->bastparam;
273 ua->bastaddr = params->bastaddr;
274 ua->xid = params->xid;
276 if (params->flags & DLM_LKF_CONVERT)
277 error = dlm_user_convert(ls, ua,
278 params->mode, params->flags,
279 params->lkid, params->lvb,
280 (unsigned long) params->timeout);
282 error = dlm_user_request(ls, ua,
283 params->mode, params->flags,
284 params->name, params->namelen,
285 (unsigned long) params->timeout);
287 error = ua->lksb.sb_lkid;
290 dlm_put_lockspace(ls);
294 static int device_user_unlock(struct dlm_user_proc *proc,
295 struct dlm_lock_params *params)
298 struct dlm_user_args *ua;
301 ls = dlm_find_lockspace_local(proc->lockspace);
305 ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL);
309 ua->user_lksb = params->lksb;
310 ua->castparam = params->castparam;
311 ua->castaddr = params->castaddr;
313 if (params->flags & DLM_LKF_CANCEL)
314 error = dlm_user_cancel(ls, ua, params->flags, params->lkid);
316 error = dlm_user_unlock(ls, ua, params->flags, params->lkid,
319 dlm_put_lockspace(ls);
323 static int create_misc_device(struct dlm_ls *ls, char *name)
328 len = strlen(name) + strlen(name_prefix) + 2;
329 ls->ls_device.name = kzalloc(len, GFP_KERNEL);
330 if (!ls->ls_device.name)
333 snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
335 ls->ls_device.fops = &device_fops;
336 ls->ls_device.minor = MISC_DYNAMIC_MINOR;
338 error = misc_register(&ls->ls_device);
340 kfree(ls->ls_device.name);
346 static int device_user_purge(struct dlm_user_proc *proc,
347 struct dlm_purge_params *params)
352 ls = dlm_find_lockspace_local(proc->lockspace);
356 error = dlm_user_purge(ls, proc, params->nodeid, params->pid);
358 dlm_put_lockspace(ls);
362 static int device_create_lockspace(struct dlm_lspace_params *params)
364 dlm_lockspace_t *lockspace;
368 if (!capable(CAP_SYS_ADMIN))
371 error = dlm_new_lockspace(params->name, strlen(params->name),
372 &lockspace, params->flags, DLM_USER_LVB_LEN);
376 ls = dlm_find_lockspace_local(lockspace);
380 error = create_misc_device(ls, params->name);
381 dlm_put_lockspace(ls);
384 dlm_release_lockspace(lockspace, 0);
386 error = ls->ls_device.minor;
391 static int device_remove_lockspace(struct dlm_lspace_params *params)
393 dlm_lockspace_t *lockspace;
395 int error, force = 0;
397 if (!capable(CAP_SYS_ADMIN))
400 ls = dlm_find_lockspace_device(params->minor);
404 /* Deregister the misc device first, so we don't have
405 * a device that's not attached to a lockspace. If
406 * dlm_release_lockspace fails then we can recreate it
408 error = misc_deregister(&ls->ls_device);
410 dlm_put_lockspace(ls);
413 kfree(ls->ls_device.name);
415 if (params->flags & DLM_USER_LSFLG_FORCEFREE)
418 lockspace = ls->ls_local_handle;
420 /* dlm_release_lockspace waits for references to go to zero,
421 so all processes will need to close their device for the ls
422 before the release will procede */
424 dlm_put_lockspace(ls);
425 error = dlm_release_lockspace(lockspace, force);
427 create_misc_device(ls, ls->ls_name);
432 /* Check the user's version matches ours */
433 static int check_version(struct dlm_write_request *req)
435 if (req->version[0] != DLM_DEVICE_VERSION_MAJOR ||
436 (req->version[0] == DLM_DEVICE_VERSION_MAJOR &&
437 req->version[1] > DLM_DEVICE_VERSION_MINOR)) {
439 printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
440 "user (%d.%d.%d) kernel (%d.%d.%d)\n",
446 DLM_DEVICE_VERSION_MAJOR,
447 DLM_DEVICE_VERSION_MINOR,
448 DLM_DEVICE_VERSION_PATCH);
458 * dlm_user_request -> request_lock
459 * dlm_user_convert -> convert_lock
462 * dlm_user_unlock -> unlock_lock
463 * dlm_user_cancel -> cancel_lock
465 * device_create_lockspace
468 * device_remove_lockspace
469 * dlm_release_lockspace
472 /* a write to a lockspace device is a lock or unlock request, a write
473 to the control device is to create/remove a lockspace */
475 static ssize_t device_write(struct file *file, const char __user *buf,
476 size_t count, loff_t *ppos)
478 struct dlm_user_proc *proc = file->private_data;
479 struct dlm_write_request *kbuf;
480 sigset_t tmpsig, allsigs;
484 if (count < sizeof(struct dlm_write_request32))
486 if (count < sizeof(struct dlm_write_request))
490 kbuf = kmalloc(count, GFP_KERNEL);
494 if (copy_from_user(kbuf, buf, count)) {
499 if (check_version(kbuf)) {
505 if (!kbuf->is64bit) {
506 struct dlm_write_request32 *k32buf;
507 k32buf = (struct dlm_write_request32 *)kbuf;
508 kbuf = kmalloc(count + (sizeof(struct dlm_write_request) -
509 sizeof(struct dlm_write_request32)), GFP_KERNEL);
514 set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
515 compat_input(kbuf, k32buf);
520 /* do we really need this? can a write happen after a close? */
521 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
522 test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
525 sigfillset(&allsigs);
526 sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
534 log_print("no locking on control device");
537 error = device_user_lock(proc, &kbuf->i.lock);
540 case DLM_USER_UNLOCK:
542 log_print("no locking on control device");
545 error = device_user_unlock(proc, &kbuf->i.lock);
548 case DLM_USER_CREATE_LOCKSPACE:
550 log_print("create/remove only on control device");
553 error = device_create_lockspace(&kbuf->i.lspace);
556 case DLM_USER_REMOVE_LOCKSPACE:
558 log_print("create/remove only on control device");
561 error = device_remove_lockspace(&kbuf->i.lspace);
566 log_print("no locking on control device");
569 error = device_user_purge(proc, &kbuf->i.purge);
573 log_print("Unknown command passed to DLM device : %d\n",
578 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
585 /* Every process that opens the lockspace device has its own "proc" structure
586 hanging off the open file that's used to keep track of locks owned by the
587 process and asts that need to be delivered to the process. */
589 static int device_open(struct inode *inode, struct file *file)
591 struct dlm_user_proc *proc;
594 ls = dlm_find_lockspace_device(iminor(inode));
598 proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL);
600 dlm_put_lockspace(ls);
604 proc->lockspace = ls->ls_local_handle;
605 INIT_LIST_HEAD(&proc->asts);
606 INIT_LIST_HEAD(&proc->locks);
607 INIT_LIST_HEAD(&proc->unlocking);
608 spin_lock_init(&proc->asts_spin);
609 spin_lock_init(&proc->locks_spin);
610 init_waitqueue_head(&proc->wait);
611 file->private_data = proc;
616 static int device_close(struct inode *inode, struct file *file)
618 struct dlm_user_proc *proc = file->private_data;
620 sigset_t tmpsig, allsigs;
622 ls = dlm_find_lockspace_local(proc->lockspace);
626 sigfillset(&allsigs);
627 sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
629 set_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags);
631 dlm_clear_proc_locks(ls, proc);
633 /* at this point no more lkb's should exist for this lockspace,
634 so there's no chance of dlm_user_add_ast() being called and
635 looking for lkb->ua->proc */
638 file->private_data = NULL;
640 dlm_put_lockspace(ls);
641 dlm_put_lockspace(ls); /* for the find in device_open() */
643 /* FIXME: AUTOFREE: if this ls is no longer used do
644 device_remove_lockspace() */
646 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
652 static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
653 int bmode, char __user *buf, size_t count)
656 struct dlm_lock_result32 result32;
658 struct dlm_lock_result result;
664 memset(&result, 0, sizeof(struct dlm_lock_result));
665 result.version[0] = DLM_DEVICE_VERSION_MAJOR;
666 result.version[1] = DLM_DEVICE_VERSION_MINOR;
667 result.version[2] = DLM_DEVICE_VERSION_PATCH;
668 memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
669 result.user_lksb = ua->user_lksb;
671 /* FIXME: dlm1 provides for the user's bastparam/addr to not be updated
672 in a conversion unless the conversion is successful. See code
673 in dlm_user_convert() for updating ua from ua_tmp. OpenVMS, though,
674 notes that a new blocking AST address and parameter are set even if
675 the conversion fails, so maybe we should just do that. */
677 if (type == AST_BAST) {
678 result.user_astaddr = ua->bastaddr;
679 result.user_astparam = ua->bastparam;
680 result.bast_mode = bmode;
682 result.user_astaddr = ua->castaddr;
683 result.user_astparam = ua->castparam;
688 len = sizeof(struct dlm_lock_result32);
691 len = sizeof(struct dlm_lock_result);
694 /* copy lvb to userspace if there is one, it's been updated, and
695 the user buffer has space for it */
697 if (ua->update_user_lvb && ua->lksb.sb_lvbptr &&
698 count >= len + DLM_USER_LVB_LEN) {
699 if (copy_to_user(buf+len, ua->lksb.sb_lvbptr,
705 result.lvb_offset = len;
706 len += DLM_USER_LVB_LEN;
713 compat_output(&result, &result32);
714 resultptr = &result32;
718 if (copy_to_user(buf, resultptr, struct_len))
726 static int copy_version_to_user(char __user *buf, size_t count)
728 struct dlm_device_version ver;
730 memset(&ver, 0, sizeof(struct dlm_device_version));
731 ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
732 ver.version[1] = DLM_DEVICE_VERSION_MINOR;
733 ver.version[2] = DLM_DEVICE_VERSION_PATCH;
735 if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
737 return sizeof(struct dlm_device_version);
740 /* a read returns a single ast described in a struct dlm_lock_result */
742 static ssize_t device_read(struct file *file, char __user *buf, size_t count,
745 struct dlm_user_proc *proc = file->private_data;
747 struct dlm_user_args *ua;
748 DECLARE_WAITQUEUE(wait, current);
749 int error, type=0, bmode=0, removed = 0;
751 if (count == sizeof(struct dlm_device_version)) {
752 error = copy_version_to_user(buf, count);
757 log_print("non-version read from control device %zu", count);
762 if (count < sizeof(struct dlm_lock_result32))
764 if (count < sizeof(struct dlm_lock_result))
768 /* do we really need this? can a read happen after a close? */
769 if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
772 spin_lock(&proc->asts_spin);
773 if (list_empty(&proc->asts)) {
774 if (file->f_flags & O_NONBLOCK) {
775 spin_unlock(&proc->asts_spin);
779 add_wait_queue(&proc->wait, &wait);
782 set_current_state(TASK_INTERRUPTIBLE);
783 if (list_empty(&proc->asts) && !signal_pending(current)) {
784 spin_unlock(&proc->asts_spin);
786 spin_lock(&proc->asts_spin);
789 set_current_state(TASK_RUNNING);
790 remove_wait_queue(&proc->wait, &wait);
792 if (signal_pending(current)) {
793 spin_unlock(&proc->asts_spin);
798 /* there may be both completion and blocking asts to return for
799 the lkb, don't remove lkb from asts list unless no asts remain */
801 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
803 if (lkb->lkb_ast_type & AST_COMP) {
804 lkb->lkb_ast_type &= ~AST_COMP;
806 } else if (lkb->lkb_ast_type & AST_BAST) {
807 lkb->lkb_ast_type &= ~AST_BAST;
809 bmode = lkb->lkb_bastmode;
812 if (!lkb->lkb_ast_type) {
813 list_del(&lkb->lkb_astqueue);
816 spin_unlock(&proc->asts_spin);
818 ua = (struct dlm_user_args *)lkb->lkb_astparam;
819 error = copy_result_to_user(ua,
820 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
821 type, bmode, buf, count);
823 /* removes reference for the proc->asts lists added by
824 dlm_user_add_ast() and may result in the lkb being freed */
831 static unsigned int device_poll(struct file *file, poll_table *wait)
833 struct dlm_user_proc *proc = file->private_data;
835 poll_wait(file, &proc->wait, wait);
837 spin_lock(&proc->asts_spin);
838 if (!list_empty(&proc->asts)) {
839 spin_unlock(&proc->asts_spin);
840 return POLLIN | POLLRDNORM;
842 spin_unlock(&proc->asts_spin);
846 static int ctl_device_open(struct inode *inode, struct file *file)
848 file->private_data = NULL;
852 static int ctl_device_close(struct inode *inode, struct file *file)
857 static const struct file_operations device_fops = {
859 .release = device_close,
861 .write = device_write,
863 .owner = THIS_MODULE,
866 static const struct file_operations ctl_device_fops = {
867 .open = ctl_device_open,
868 .release = ctl_device_close,
870 .write = device_write,
871 .owner = THIS_MODULE,
874 int dlm_user_init(void)
878 ctl_device.name = "dlm-control";
879 ctl_device.fops = &ctl_device_fops;
880 ctl_device.minor = MISC_DYNAMIC_MINOR;
882 error = misc_register(&ctl_device);
884 log_print("misc_register failed for control device");
889 void dlm_user_exit(void)
891 misc_deregister(&ctl_device);